{
  "openapi": "3.0.1",
  "info": {
    "title": "Website Content Crawler",
    "description": "Crawl websites and extract text content to feed AI models, LLM applications, vector databases, or RAG pipelines. The Actor supports rich formatting using Markdown, cleans the HTML, downloads files, and integrates well with 🦜🔗 LangChain, LlamaIndex, and the wider LLM ecosystem.",
    "version": "0.3",
    "x-build-id": "ag5IWnRTtSxqRFKZg"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/apify~website-content-crawler/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-apify-website-content-crawler",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/apify~website-content-crawler/runs": {
      "post": {
        "operationId": "runs-sync-apify-website-content-crawler",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/apify~website-content-crawler/run-sync": {
      "post": {
        "operationId": "run-sync-apify-website-content-crawler",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "startUrls",
          "proxyConfiguration"
        ],
        "properties": {
          "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "One or more URLs of pages where the crawler will start.\n\nBy default, the Actor will also crawl sub-pages of these URLs.\n\nFor example, for start URL `https://example.com/blog`, it will crawl also `https://example.com/blog/post` or `https://example.com/blog/article`.\n\nThe **Include URL patterns (globs)** option can override this behavior.",
            "items": {
              "type": "object",
              "required": [
                "url"
              ],
              "properties": {
                "url": {
                  "type": "string",
                  "title": "URL of a web page",
                  "format": "uri"
                }
              }
            }
          },
          "crawlerType": {
            "title": "Crawler type",
            "enum": [
              "playwright:adaptive",
              "playwright:firefox",
              "cheerio",
              "jsdom",
              "playwright:chrome"
            ],
            "type": "string",
            "description": "Select the crawling engine:\n- **Adaptive switching** between browser and raw HTTP: Fast and renders JavaScript content if present. Default and recommended option.\n- **Headless browser** (Firefox+Playwright): Reliable, renders JavaScript content, best in avoiding blocking, but might be slow.\n- **Raw HTTP client** (Cheerio): Fastest, but doesn't render JavaScript content.\n- **Raw HTTP client with JavaScript** (JSDOM): Deprecated, use Cheerio instead.\n- **Headless browser** (Chrome+Playwright): Deprecated, use Firefox+Playwright instead.\n\nMore details about Crawler types are in [readme](https://console.apify.com/actors/aYG0l9s7dbB7j3gbS/information/version-0/readme#crawler-types).",
            "default": "playwright:firefox"
          },
          "includeUrlGlobs": {
            "title": "Include URL patterns (globs)",
            "type": "array",
            "description": "Define URL patterns (globs) to extend crawling beyond **Start URLs** and their subpages.\n\nExample: `https://www.example.com/blog/**` matches any blog page — `https://www.example.com/blog/post-title` or `https://www.example.com/blog/category/post` — even if the Start URL is `https://www.example.com/product/some-product`.\n\nIt affects only links found on pages, but not **Start URLs** - if you want to crawl a page, make sure to specify its URL in the **Start URLs** field.\n\nCombined with **Exclude URL patterns**, you can precisely control which pages are crawled.\n\nLearn more about globs [here](https://www.digitalocean.com/community/tools/glob?comments=true&glob=https%3A%2F%2Fexample.com%2Fscrape_this%2F%2A%2A&matches=false&tests=https%3A%2F%2Fexample.com%2Ftools%2F&tests=https%3A%2F%2Fexample.com%2Fscrape_this%2F&tests=https%3A%2F%2Fexample.com%2Fscrape_this%2F123%3Ftest%3Dabc&tests=https%3A%2F%2Fexample.com%2Fdont_scrape_this) and test them with our **Glob tester** under this input.",
            "default": [],
            "items": {
              "type": "object",
              "required": [
                "glob"
              ],
              "properties": {
                "glob": {
                  "type": "string",
                  "title": "Glob of a web page"
                }
              }
            }
          },
          "excludeUrlGlobs": {
            "title": "Exclude URL patterns (globs)",
            "type": "array",
            "description": "Glob patterns matching URLs of pages that will be excluded from crawling. Note that this affects only links found on pages, but not **Start URLs**, which are always crawled. \n\nFor example `https://{store,docs}.example.com/**` excludes all URLs starting with `https://store.example.com/` or `https://docs.example.com/`, and `https://example.com/**/*\\?*foo=*` excludes all URLs that contain `foo` query parameter with any value.\n\nLearn more about globs [here](https://www.digitalocean.com/community/tools/glob?comments=true&glob=https%3A%2F%2Fexample.com%2Fscrape_this%2F%2A%2A&matches=false&tests=https%3A%2F%2Fexample.com%2Ftools%2F&tests=https%3A%2F%2Fexample.com%2Fscrape_this%2F&tests=https%3A%2F%2Fexample.com%2Fscrape_this%2F123%3Ftest%3Dabc&tests=https%3A%2F%2Fexample.com%2Fdont_scrape_this) and test them with our **Glob tester** under this input.",
            "default": [],
            "items": {
              "type": "object",
              "required": [
                "glob"
              ],
              "properties": {
                "glob": {
                  "type": "string",
                  "title": "Glob of a web page"
                }
              }
            }
          },
          "maxCrawlDepth": {
            "title": "Max crawling depth",
            "minimum": 0,
            "type": "integer",
            "description": "The maximum number of links starting from the start URL that the crawler will recursively follow. The start URLs have depth `0`, the pages linked directly from the start URLs have depth `1`, and so on.\n\nUseful to prevent accidental crawler runaway. By setting it to `0`, the Actor will only crawl the Start URLs.",
            "default": 20
          },
          "maxCrawlPages": {
            "title": "Max pages",
            "minimum": 0,
            "type": "integer",
            "description": "The maximum number pages to crawl. It includes the start URLs, pagination pages, pages with no content, etc. The crawler will automatically finish after reaching this number. This setting is useful to prevent accidental crawler runaway.",
            "default": 9999999
          },
          "useSitemaps": {
            "title": "Load URLs from Sitemaps",
            "type": "boolean",
            "description": "If enabled, the crawler will look for [Sitemaps](https://en.wikipedia.org/wiki/Sitemaps) at the domains of the provided *Start URLs* and enqueue matching URLs similarly as the links found on crawled pages.\n\nYou can also reference a `sitemap.xml` file directly by adding it as another Start URL (e.g. `https://www.example.com/sitemap.xml`)\n\nThe crawling could be more robust with Sitemaps, as it includes pages that might be not reachable from Start URLs. However, **loading and processing Sitemaps can take a lot of time, especially for large sites**.\n\nNote that if a page is found via Sitemaps, it will have `depth` of `1`.",
            "default": false
          },
          "useLlmsTxt": {
            "title": "Crawl /llms.txt and Markdown files",
            "type": "boolean",
            "description": "If enabled, the crawler will look for `/llms.txt` files at the root of the domains of the provided Start URLs (e.g., `https://example.com/llms.txt`) and enqueue them for crawling. Note that this also enables crawling other Markdown files and enqueueing links from them.",
            "default": false
          },
          "respectRobotsTxtFile": {
            "title": "Respect the robots.txt file",
            "type": "boolean",
            "description": "If enabled, the crawler will consult the robots.txt file for the target website before crawling each page. At the moment, the crawler does not use any specific user agent identifier. The crawl-delay directive is also not supported yet.",
            "default": false
          },
          "keepUrlFragments": {
            "title": "URL #fragments identify unique pages",
            "type": "boolean",
            "description": "Indicates that URL fragments (e.g. <code>http://example.com<b>#fragment</b></code>) should be included when checking whether a URL has already been visited or not. Typically, URL fragments are used for page navigation only and therefore they should be ignored, as they don't identify separate pages. However, some single-page websites use URL fragments to display different pages; in such a case, this option should be enabled.",
            "default": false
          },
          "ignoreCanonicalUrl": {
            "title": "Ignore canonical URLs",
            "type": "boolean",
            "description": "If enabled, the Actor will ignore the canonical URL or the `ETag` header reported by the page, and use the actual URL instead. You can use this feature for websites that report invalid canonical URLs, which causes the Actor to skip those pages in results.",
            "default": false
          },
          "proxyConfiguration": {
            "title": "Proxy configuration",
            "type": "object",
            "description": "Enables loading the websites from IP addresses in specific geographies and to circumvent blocking.",
            "default": {
              "useApifyProxy": true
            }
          },
          "initialCookies": {
            "title": "Custom cookies",
            "type": "array",
            "description": "Cookies that will be pre-set to all pages the scraper opens. This is useful for pages that require login. The value is expected to be a JSON array of objects with `name` and `value` properties. For example: \n\n```json\n[\n  {\n    \"name\": \"cookieName\",\n    \"value\": \"cookieValue\",\n    \"path\": \"/\",\n    \"domain\": \".apify.com\"\n  }\n]\n```\n\nYou can use the [EditThisCookie](https://docs.apify.com/academy/tools/edit-this-cookie) browser extension to copy browser cookies in this format, and paste it here.\n\nNote that the value is secret and encrypted to protect your login cookies."
          },
          "customHttpHeaders": {
            "title": "Custom HTTP headers",
            "type": "object",
            "description": "HTTP headers that will be added to all requests made by the crawler. This is useful for setting custom authentication headers or other headers required by the target website. The value is expected to be a JSON object with `name` and `value` properties pairs. For example: `{ \"name1\": \"value1\", \"Authorization\": \"Basic a1b2c3d4...\" }`.",
            "default": {}
          },
          "signHttpRequests": {
            "title": "Sign HTTP requests (experimental)",
            "type": "boolean",
            "description": "If enabled, the crawler will sign all HTTP requests using its Web Bot Auth private key. This is necessary if you want to use Website Content Crawler as a Cloudflare Signed Agent.",
            "default": false
          },
          "initialConcurrency": {
            "title": "Initial concurrency",
            "minimum": 0,
            "maximum": 999,
            "type": "integer",
            "description": "The initial number of web browsers or HTTP clients running in parallel. The system scales the concurrency up and down based on the current CPU and memory load. If the value is set to 0 (default), the Actor uses the default setting for the specific crawler type.\n\nNote that if you set this value too high, the Actor will run out of memory and crash. If too low, it will be slow at start before it scales the concurrency up.",
            "default": 0
          },
          "maxConcurrency": {
            "title": "Max concurrency",
            "minimum": 1,
            "maximum": 999,
            "type": "integer",
            "description": "The maximum number of web browsers or HTTP clients running in parallel. This setting is useful to avoid overloading the target websites and to avoid getting blocked.",
            "default": 200
          },
          "requestTimeoutSecs": {
            "title": "Page request timeout",
            "minimum": 1,
            "maximum": 600,
            "type": "integer",
            "description": "Timeout in seconds for making the request and processing its response. Defaults to 60s.",
            "default": 60
          },
          "minFileDownloadSpeedKBps": {
            "title": "Minimum file download speed",
            "type": "integer",
            "description": "The minimum viable file download speed in kilobytes per seconds. If the file download speed is lower than this value for a prolonged duration, the crawler will consider the file download as failing, abort it, and retry it again (up to \"Maximum number of retries\" times). This is useful to avoid your crawls being stuck on slow file downloads.",
            "default": 128
          },
          "maxRequestRetries": {
            "title": "Maximum number of retries on network / server errors",
            "minimum": 0,
            "maximum": 20,
            "type": "integer",
            "description": "The maximum number of times the crawler will retry the request on network, proxy or server errors. If the (n+1)-th request still fails, the crawler will mark this request as failed.",
            "default": 3
          },
          "maxSessionRotations": {
            "title": "Maximum number of session rotations",
            "minimum": 0,
            "maximum": 20,
            "type": "integer",
            "description": "The maximum number of times the crawler will rotate the session (IP address + browser configuration) on anti-scraping measures like CAPTCHAs. If the crawler rotates the session more than this number and the page is still blocked, it will finish with an error.",
            "default": 10
          },
          "ignoreHttpsErrors": {
            "title": "Ignore HTTPS errors",
            "type": "boolean",
            "description": "If enabled, the scraper will ignore HTTPS certificate errors. Use at your own risk.",
            "default": false
          },
          "dynamicContentWaitSecs": {
            "title": "Wait for dynamic content",
            "type": "integer",
            "description": "The maximum time in seconds to wait for dynamic page content to load. By default, it is 10 seconds. The crawler will continue processing the page either if this time elapses, or if it detects the network became idle as there are no more requests for additional resources.\n\nWhen using the **Wait for selector** option, the crawler will wait for the selector to appear for this amount of time. If the selector doesn't appear within this period, the request will fail and will be retried.\n\nNote that this setting is ignored for the raw HTTP client, because it doesn't execute JavaScript or loads any dynamic resources. Similarly, if the value is set to `0`, the crawler doesn't wait for any dynamic to load and processes the HTML as provided on load.",
            "default": 10
          },
          "waitForSelector": {
            "title": "Wait for selector",
            "type": "string",
            "description": "Specify a **CSS selector** to tell the crawler to wait for a specific element to appear before it starts extracting content. This is helpful for pages where the content loads dynamically.\n\nExamples: `div`, `#id-of-an-element`, `.class-name`\n\nThis setting disables the default content-load detection. If the element doesn't appear within the **Wait for dynamic content** timeout, the request will fail and be retried.\n\nWith the raw HTTP client, this option checks for the presence of the selector in the HTML content and throws an error if it's not found.",
            "default": ""
          },
          "softWaitForSelector": {
            "title": "Soft wait for selector",
            "type": "string",
            "description": "If set, the crawler will wait for the specified CSS selector to appear in the page before proceeding with the content extraction. Unlike the `waitForSelector` option, this option doesn't fail the request if the selector doesn't appear within the timeout (the request processing will continue).",
            "default": ""
          },
          "maxScrollHeightPixels": {
            "title": "Maximum scroll height",
            "minimum": 0,
            "type": "integer",
            "description": "The crawler will scroll down the page until all content is loaded (and network becomes idle), or until this maximum scrolling height is reached. Setting this value to `0` disables scrolling altogether.\n\nNote that this setting is ignored for the raw HTTP client, because it doesn't execute JavaScript or loads any dynamic resources.",
            "default": 5000
          },
          "removeCookieWarnings": {
            "title": "Remove cookie warnings",
            "type": "boolean",
            "description": "If enabled, the Actor will try to remove cookies consent dialogs or modals, using the [I don't care about cookies](https://addons.mozilla.org/en-US/firefox/addon/i-dont-care-about-cookies/) browser extension, to improve the accuracy of the extracted text. Note that there is a small performance penalty if this feature is enabled.\n\nThis setting is ignored when using the raw HTTP crawler type.",
            "default": true
          },
          "blockMedia": {
            "title": "Block loading of images and videos",
            "type": "boolean",
            "description": "If the flag is enabled and the Actor is using a headless browser, it will not load images, fonts, stylesheets and videos to improve performance. It will load scripts as usual - that is after all the point of using a headless browser.",
            "default": false
          },
          "expandIframes": {
            "title": "Expand iframe elements",
            "type": "boolean",
            "description": "By default, the Actor will extract content from `iframe` elements. If you want to specifically skip `iframe` processing, disable this option. Works only for the `playwright:firefox` crawler type.",
            "default": true
          },
          "clickElementsCssSelector": {
            "title": "Expand clickable elements",
            "type": "string",
            "description": "A CSS selector matching DOM elements that will be clicked. This is useful for expanding collapsed sections, in order to capture their text content. The value must be a valid CSS selector as accepted by the `document.querySelectorAll()` function. ",
            "default": "[aria-expanded=\"false\"]"
          },
          "stickyContainerCssSelector": {
            "title": "Make containers sticky",
            "type": "string",
            "description": "This is an **experimental** feature. A CSS selector matching DOM elements that will be prevented from deleting any of their children. This is useful in conjunction with the \"Expand clickable elements\" option on pages where hidden content is actually removed from the DOM (i.e., some variants of the accordion pattern). Enabling this might corrupt the extracted content, which is why it is disabled by default. It is possible to enable the feature for the whole page with the `*` selector, or you can target specific elements if the former has unwanted side effects."
          },
          "pageFunction": {
            "title": "Page function",
            "type": "string",
            "description": "A declaration of an asynchronous JS function (e.g. `async function pageFunction({ page }) { await page.click('.submit-button') }`).\n\nThe function receives `context` as the only argument. Context is a JavaScript object containing the following properties:\n- `page`: Currently loaded Playwright `Page` instance.\n- `request`: The request object that triggered the page load.\n\nThe function will be executed in the browser context for each crawled page, after the page is loaded (included all dynamic content) and before the content is extracted and cleaned.",
            "default": ""
          },
          "keepElementsCssSelector": {
            "title": "Keep HTML elements (CSS selector)",
            "type": "string",
            "description": "Extract only relevant page content by specifying CSS selectors (e.g. `div`, `#element-id`, `.class-name`). [Learn more about CSS selectors](https://developer.mozilla.org/en-US/docs/Learn_web_development/Core/Styling_basics/Basic_selectors).\n\nIf any selectors are defined, everything else will be removed from the page.\n\nThis option runs before the `HTML transformer` option. If you are missing content in the output despite using this option, try disabling the `HTML transformer`.",
            "default": ""
          },
          "removeElementsCssSelector": {
            "title": "Remove HTML elements (CSS selector)",
            "type": "string",
            "description": "Specify which HTML elements should be removed from the page before text extraction. This is useful to skip irrelevant page content.\n\nBy default, the Actor removes common navigation elements, headers, footers, modals, scripts, and inline image. You can disable the removal by setting this value to some non-existent CSS selector like `dummy_keep_everything`.",
            "default": "nav, footer, script, style, noscript, svg, img[src^='data:'],\n[role=\"alert\"],\n[role=\"banner\"],\n[role=\"dialog\"],\n[role=\"alertdialog\"],\n[role=\"region\"][aria-label*=\"skip\" i],\n[aria-modal=\"true\"]"
          },
          "htmlTransformer": {
            "title": "HTML transformer",
            "enum": [
              "readableTextIfPossible",
              "readableText",
              "extractus",
              "defuddle",
              "none"
            ],
            "type": "string",
            "description": "Specify how to transform HTML to get meaningful content, removing extra fluff like navigation or pop-ups. This is applied after any HTML elements are removed or clicked.\n\n- **Readable text with fallback**: Uses Mozilla's Readability to extract content, but keeps the original HTML if it's not a clear article. Great for sites with mixed content like articles and product pages.\n\n- **Readable text** (Default): Also uses Mozilla's Readability but is more aggressive, removing headers, footers, and navigation. Best for blogs and article-heavy sites.\n\n- **Extractus**: An alternative content extraction algorithm that might work better for certain news sites or blogs with unique layouts.\n\n- **Defuddle**: More forgiving than Readability, better preserving elements like math and footnotes, code. It also extracts metadata and uses mobile styles for clean-up.\n\n- **None**: Only performs basic cleaning and removes elements specified by you. This option is best when you need to preserve most of the page's original HTML.",
            "default": "readableText"
          },
          "readableTextCharThreshold": {
            "title": "Readable text extractor character threshold",
            "type": "integer",
            "description": "A configuration options for the \"Readable text\" HTML transformer. It contains the minimum number of characters an article must have in order to be considered relevant.",
            "default": 100
          },
          "aggressivePrune": {
            "title": "Remove duplicate text lines",
            "type": "boolean",
            "description": "This is an **experimental feature**. If enabled, the crawler will prune content lines that are very similar to the ones already crawled on other pages, using the Count-Min Sketch algorithm. This is useful to strip repeating content in the scraped data like menus, headers, footers, etc. In some (not very likely) cases, it might remove relevant content from some pages.",
            "default": false
          },
          "debugMode": {
            "title": "Debug mode (stores output of all HTML transformers)",
            "type": "boolean",
            "description": "If enabled, the Actor will store the output of all types of HTML transformers, including the ones that are not used by default, and it will also store the HTML to Key-value Store with a link. All this data is stored under the `debug` field in the resulting Dataset.",
            "default": false
          },
          "debugLog": {
            "title": "Debug log",
            "type": "boolean",
            "description": "If enabled, the actor log will include debug messages. Beware that this can be quite verbose.",
            "default": false
          },
          "storeSkippedUrls": {
            "title": "Store skipped URLs",
            "type": "boolean",
            "description": "If enabled, the crawler will store all URLs that were skipped during the crawl in a Key-Value Store record named `SKIPPED_URLS`. The record will contain a JSON object with reasons for skipping and the URLs that were skipped for each reason. This is useful for debugging and understanding why certain pages were not crawled.",
            "default": false
          },
          "saveHtml": {
            "title": "Save HTML to dataset (deprecated)",
            "type": "boolean",
            "description": "If enabled, the crawler stores full transformed HTML of all pages found to the output dataset under the `html` field. **This option has been deprecated** in favor of the `saveHtmlAsFile` option, because the dataset records have a size of approximately 10MB and it's harder to review the HTML for debugging.",
            "default": false
          },
          "saveHtmlAsFile": {
            "title": "Save HTML to key-value store",
            "type": "boolean",
            "description": "If enabled, the crawler stores full transformed HTML of all pages found to the default key-value store and saves links to the files as `htmlUrl` field in the output dataset. Storing HTML in key-value store is preferred to storing it into the dataset with the `saveHtml` option, because there's no size limit and it's easier for debugging as you can easily view the HTML.",
            "default": false
          },
          "saveMarkdown": {
            "title": "Save Markdown",
            "type": "boolean",
            "description": "If enabled, the crawler converts the transformed HTML of all pages found to Markdown, and stores it under the `markdown` field in the output dataset.",
            "default": true
          },
          "saveFiles": {
            "title": "Save files",
            "type": "boolean",
            "description": "Deprecated in favor of the `saveContentTypes` option. Will be removed soon. If enabled, the crawler downloads files linked from the web pages, as long as their URL has one of the following file extensions: PDF, DOC, DOCX, XLS, XLSX, and CSV. Note that unlike web pages, the files are downloaded regardless if they are under **Start URLs** or not. The files are stored to the default key-value store, and metadata about them to the output dataset, similarly as for web pages.",
            "default": false
          },
          "saveContentTypes": {
            "title": "Save linked files with Content-Type",
            "type": "string",
            "description": "The crawler downloads files linked from the web pages, as long as their content type matches the provided value. Select predefined <a href=\"https://www.iana.org/assignments/media-types/media-types.xhtml\">Content-type</a> groups to download common file types, or enter custom HTTP Content-type strings, including wildcards (e.g., application/pdf, text/\\*, image/\\*) for specific downloads. Note that unlike web pages, the files are downloaded regardless if they are under **Start URLs** or not. The files are stored to the default key-value store, and metadata about them to the output dataset, similarly as for web pages."
          },
          "saveScreenshots": {
            "title": "Save screenshots (headless browser only)",
            "type": "boolean",
            "description": "If enabled, the crawler stores a screenshot for each article page to the default key-value store. The link to the screenshot is stored under the `screenshotUrl` field in the output dataset. It is useful for debugging, but reduces performance and increases storage costs.\n\nNote that this feature only works with the `playwright:firefox` crawler type.",
            "default": false
          },
          "maxResults": {
            "title": "Max results",
            "minimum": 0,
            "type": "integer",
            "description": "The maximum number of web pages and files to store. This setting helps prevent an accidental crawler runaway by automatically stopping the crawl once this limit is reached. Note that the crawler skips pages whose canonical URL matches a page that has already been crawled, so it may crawl more pages than the number of stored results. Similarly, there may be more stored results than crawled web pages because downloaded files also count toward results.",
            "default": 9999999
          },
          "clientSideMinChangePercentage": {
            "title": "(Adaptive crawling only) Minimum client-side content change percentage",
            "minimum": 1,
            "type": "integer",
            "description": "The least amount of content (as a percentage) change after the initial load required to consider the pages client-side rendered",
            "default": 15
          },
          "renderingTypeDetectionPercentage": {
            "title": "(Adaptive crawling only) How often should the crawler attempt to detect page rendering type",
            "minimum": 1,
            "maximum": 100,
            "type": "integer",
            "description": "How often should the adaptive attempt to detect page rendering type",
            "default": 10
          },
          "reuseStoredDetectionResults": {
            "title": "Reuse stored detections results (experimental)",
            "type": "boolean",
            "description": "If enabled, the crawler (if using playwright:adaptive) will reuse results of rendering type detections done in previous runs to speed up crawling of statically rendered pages",
            "default": false
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}