## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", eval = FALSE)

## ----setup--------------------------------------------------------------------
# library(crawlee)

## -----------------------------------------------------------------------------
# crawler("https://books.toscrape.com/") |>
#   cr_options(
#     user_agent = "my-research-bot (you@example.com)",
#     delay = 0.5, # seconds between requests
#     max_requests = 500,
#     max_depth = 4,
#     respect_robots = TRUE
#   ) |>
#   cr_on_html(function(ctx) ctx$enqueue_links()) |>
#   cr_run()

## -----------------------------------------------------------------------------
# crawler("https://books.toscrape.com/") |>
#   cr_parallel(concurrency = 8) |>
#   cr_on_html(function(ctx) ctx$enqueue_links()) |>
#   cr_run()

## -----------------------------------------------------------------------------
# crawler("https://books.toscrape.com/") |>
#   cr_autoscale(min = 2, max = 16) |>
#   cr_on_html(function(ctx) ctx$enqueue_links()) |>
#   cr_run()

## -----------------------------------------------------------------------------
# crawler("https://books.toscrape.com/") |>
#   cr_stream(concurrency = 10) |>
#   cr_on_html(function(ctx) ctx$enqueue_links()) |>
#   cr_run()

## -----------------------------------------------------------------------------
# crawler("https://books.toscrape.com/") |>
#   cr_autoscale(min = 2, max = 16) |>
#   cr_persist("runs/books", dataset = "duckdb") |>
#   cr_on_html(function(ctx) ctx$enqueue_links()) |>
#   cr_run()

