{
  "openapi": "3.0.1",
  "info": {
    "title": "Modern Web Crawler — Adaptive + Stealth + Analytics",
    "description": "Modern replacement for the Legacy PhantomJS Crawler. Auto HTTP/Browser detection, basic anti-bot stealth, built-in analytics, data quality scoring, captcha solver integration. Modern Chrome + Cheerio engine — no PhantomJS, no abandoned tech. Proxies included.",
    "version": "1.0",
    "x-build-id": "Po3YsV8GFzk60uHkx"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/brilliant_gum~phantom-reborn-crawler/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-brilliant_gum-phantom-reborn-crawler",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/brilliant_gum~phantom-reborn-crawler/runs": {
      "post": {
        "operationId": "runs-sync-brilliant_gum-phantom-reborn-crawler",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/brilliant_gum~phantom-reborn-crawler/run-sync": {
      "post": {
        "operationId": "run-sync-brilliant_gum-phantom-reborn-crawler",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "startUrls"
        ],
        "properties": {
          "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start crawling from. Supports GET/POST, custom headers, labels, and userData per URL.",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string",
                  "title": "URL",
                  "description": "Page URL"
                }
              }
            }
          },
          "linkSelector": {
            "title": "Link Selector",
            "type": "string",
            "description": "CSS selector to find links on the page. Empty string = scrape only start URLs (no link following).",
            "default": "a[href]"
          },
          "globs": {
            "title": "URL Patterns (Globs)",
            "type": "array",
            "description": "Glob patterns to filter which discovered URLs to crawl. Only matching URLs will be enqueued. Example: https://example.com/products/**",
            "items": {
              "type": "object"
            },
            "default": []
          },
          "excludeGlobs": {
            "title": "Exclude URL Patterns",
            "type": "array",
            "description": "Glob patterns for URLs to exclude from crawling.",
            "items": {
              "type": "object"
            },
            "default": []
          },
          "pseudoUrls": {
            "title": "Pseudo-URLs (Legacy)",
            "type": "array",
            "description": "Legacy pseudo-URL patterns with regex in square brackets. For backward compatibility — prefer Globs for new projects.",
            "items": {
              "type": "object"
            },
            "default": []
          },
          "keepUrlFragments": {
            "title": "Keep URL Fragments",
            "type": "boolean",
            "description": "Treat URL #fragments as unique pages (important for SPAs with hash-based routing).",
            "default": false
          },
          "pageFunction": {
            "title": "Page Function",
            "type": "string",
            "description": "Custom JavaScript function executed on each page. Receives context with: { page, $, body, json, request, response, log, enqueueLinks, pushData, getValue, setValue, customData, crawler }. Return extracted data or use pushData()."
          },
          "preNavigationHooks": {
            "title": "Pre-Navigation Hooks",
            "type": "string",
            "description": "JavaScript code executed BEFORE each page navigation. Use to set cookies, modify headers, or prepare the page. Available variables: { request, page, session, proxyInfo, log }.",
            "default": ""
          },
          "postNavigationHooks": {
            "title": "Post-Navigation Hooks",
            "type": "string",
            "description": "JavaScript code executed AFTER each page navigation, before the page function. Use to check page state, dismiss popups, or handle auth flows.",
            "default": ""
          },
          "crawlerMode": {
            "title": "Crawler Mode",
            "enum": [
              "adaptive",
              "browser",
              "http"
            ],
            "type": "string",
            "description": "'adaptive' auto-detects JS-heavy pages. 'browser' always uses Playwright. 'http' uses fast HTTP (Cheerio, no JS rendering).",
            "default": "adaptive"
          },
          "headless": {
            "title": "Headless Mode",
            "type": "boolean",
            "description": "Run browser in headless mode. Disable for debugging or sites that detect headless browsers.",
            "default": true
          },
          "useChrome": {
            "title": "Use Chrome (not Chromium)",
            "type": "boolean",
            "description": "Use the real Google Chrome browser instead of Chromium. Better anti-detection but slower startup.",
            "default": false
          },
          "waitUntil": {
            "title": "Wait Until",
            "enum": [
              "domcontentloaded",
              "load",
              "networkidle"
            ],
            "type": "string",
            "description": "When to consider the page loaded. 'networkidle' waits for no network activity (slowest but safest). 'domcontentloaded' is faster.",
            "default": "domcontentloaded"
          },
          "waitForSelectorOrTimeout": {
            "title": "Additional Wait (Selector or ms)",
            "type": "string",
            "description": "Extra wait after page load: CSS selector to appear OR milliseconds. Examples: '.product-card', '3000'",
            "default": ""
          },
          "maxCrawlDepth": {
            "title": "Max Crawl Depth",
            "minimum": 0,
            "maximum": 100,
            "type": "integer",
            "description": "Max link distance from start URLs. 0 = only start URLs.",
            "default": 10
          },
          "maxCrawlPages": {
            "title": "Max Pages to Crawl",
            "minimum": 1,
            "maximum": 10000000,
            "type": "integer",
            "description": "Maximum total pages to crawl (including pages that don't produce output).",
            "default": 100
          },
          "maxResultsPerCrawl": {
            "title": "Max Output Results",
            "minimum": 0,
            "maximum": 10000000,
            "type": "integer",
            "description": "Maximum records to save to the dataset. 0 = unlimited. Separate from max pages — crawl 1000 pages but output only 100 results.",
            "default": 0
          },
          "maxConcurrency": {
            "title": "Max Concurrency",
            "minimum": 1,
            "maximum": 200,
            "type": "integer",
            "description": "Maximum parallel pages. Lower = safer for anti-bot, higher = faster.",
            "default": 10
          },
          "maxRequestRetries": {
            "title": "Max Retries",
            "minimum": 0,
            "maximum": 10,
            "type": "integer",
            "description": "How many times to retry a failed page before giving up.",
            "default": 3
          },
          "pageLoadTimeoutSecs": {
            "title": "Page Load Timeout (sec)",
            "minimum": 1,
            "maximum": 300,
            "type": "integer",
            "description": "Maximum time to wait for a page to load.",
            "default": 60
          },
          "pageFunctionTimeoutSecs": {
            "title": "Page Function Timeout (sec)",
            "minimum": 1,
            "maximum": 3600,
            "type": "integer",
            "description": "Maximum time for the page function to complete. Separate from page load timeout. Increase for heavy extraction.",
            "default": 60
          },
          "maxInfiniteScrollHeight": {
            "title": "Max Scroll Height (px)",
            "minimum": 0,
            "maximum": 1000000,
            "type": "integer",
            "description": "Scroll down this many pixels for infinite scroll pages. 0 = no scrolling. Browser mode only.",
            "default": 0
          },
          "closeCookieModals": {
            "title": "Close Cookie Modals",
            "type": "boolean",
            "description": "Automatically dismiss GDPR cookie consent banners and popups. Browser mode only.",
            "default": false
          },
          "delayBetweenRequestsMs": {
            "title": "Delay Between Requests (ms)",
            "minimum": 0,
            "maximum": 60000,
            "type": "integer",
            "description": "Minimum delay between requests. Randomized by ±25% for human-like behavior.",
            "default": 500
          },
          "blockResources": {
            "title": "Block Resources",
            "type": "array",
            "description": "Block unnecessary resources to speed up crawling (browser mode). Valid values: image, stylesheet, font, media, script, other.",
            "items": {
              "type": "string"
            },
            "default": []
          },
          "ignoreSslErrors": {
            "title": "Ignore SSL Errors",
            "type": "boolean",
            "description": "Continue crawling when SSL certificate verification fails.",
            "default": false
          },
          "ignoreRobotsTxt": {
            "title": "Ignore robots.txt",
            "type": "boolean",
            "description": "Ignore robots.txt rules. Use responsibly and check legal requirements.",
            "default": false
          },
          "ignoreCorsAndCsp": {
            "title": "Ignore CORS and CSP",
            "type": "boolean",
            "description": "Bypass Cross-Origin Resource Sharing and Content Security Policy restrictions. Allows cross-domain XHR from page function.",
            "default": false
          },
          "stealthMode": {
            "title": "Stealth Mode",
            "type": "boolean",
            "description": "Anti-detection: fingerprint randomization, realistic headers, webdriver flag masking. Recommended for sites with bot protection.",
            "default": true
          },
          "proxyConfiguration": {
            "title": "Proxy Configuration (optional)",
            "type": "object",
            "description": "Optional. Proxies are included and configured automatically — leave this empty. Override only if you want to use your own proxy URLs or a specific country."
          },
          "proxyRotation": {
            "title": "Proxy Rotation Strategy",
            "enum": [
              "recommended",
              "per_request",
              "until_failure"
            ],
            "type": "string",
            "description": "'recommended' rotates automatically. 'per_request' uses new proxy per request. 'until_failure' keeps proxy until it fails.",
            "default": "recommended"
          },
          "sessionPoolOptions": {
            "title": "Use Session Pool",
            "type": "boolean",
            "description": "Maintain persistent sessions (cookies + proxy pairs) for consistent browsing.",
            "default": true
          },
          "sessionPoolName": {
            "title": "Session Pool Name",
            "type": "string",
            "description": "Name for the session pool. Named pools can be shared across actor runs for persistent sessions.",
            "default": ""
          },
          "maxSessionUsageCount": {
            "title": "Max Uses Per Session",
            "minimum": 1,
            "maximum": 1000,
            "type": "integer",
            "description": "Retire a session after this many requests. Lower values reduce detection risk.",
            "default": 50
          },
          "captchaSolverApiKey": {
            "title": "Captcha Solver API Key (2captcha)",
            "type": "string",
            "description": "Your 2captcha.com API key. When set, the crawler automatically detects and solves Cloudflare Turnstile and DataDome captchas. Leave empty to disable captcha solving."
          },
          "captchaSolverTimeoutSecs": {
            "title": "Captcha Solver Timeout (sec)",
            "minimum": 30,
            "maximum": 300,
            "type": "integer",
            "description": "Maximum time to wait for captcha solution from 2captcha.",
            "default": 120
          },
          "captchaMaxRetries": {
            "title": "Captcha Max Retries",
            "minimum": 1,
            "maximum": 5,
            "type": "integer",
            "description": "Maximum captcha solve attempts per page before giving up.",
            "default": 2
          },
          "customHeaders": {
            "title": "Custom HTTP Headers",
            "type": "object",
            "description": "Custom HTTP headers sent with every request. Example: {\"Accept-Language\": \"de-DE\"}",
            "default": {}
          },
          "initialCookies": {
            "title": "Initial Cookies",
            "type": "array",
            "description": "Cookies to set before crawling. Array of {name, value, domain} objects.",
            "items": {
              "type": "object"
            },
            "default": []
          },
          "customData": {
            "title": "Custom Data",
            "type": "object",
            "description": "Arbitrary JSON passed to page function as context.customData.",
            "default": {}
          },
          "extractMetadata": {
            "title": "Auto-Extract Page Metadata",
            "type": "boolean",
            "description": "Automatically extract Open Graph, Twitter Cards, JSON-LD / Schema.org structured data, canonical URL, and SEO metadata from every page.",
            "default": true
          },
          "extractContacts": {
            "title": "Auto-Extract Contact Info",
            "type": "boolean",
            "description": "Automatically find and extract email addresses, phone numbers, and social media links (Twitter, LinkedIn, Facebook, Instagram, YouTube, GitHub) from page content.",
            "default": false
          },
          "extractCleanText": {
            "title": "Auto-Extract Clean Text",
            "type": "boolean",
            "description": "Extract the main content text from each page, stripping navigation, ads, sidebars, and boilerplate. Like 'Reader Mode' in browsers. Great for LLM/RAG pipelines.",
            "default": false
          },
          "saveScreenshotPerPage": {
            "title": "Save Screenshot Per Page",
            "type": "boolean",
            "description": "Capture a screenshot of every crawled page (browser mode only). Saved to key-value store. Useful for visual monitoring, archiving, and debugging.",
            "default": false
          },
          "enableAnalytics": {
            "title": "Enable Analytics Report",
            "type": "boolean",
            "description": "Generate detailed crawl report: success rates, response time percentiles, error breakdown, data quality metrics, crawl speed. Saved to key-value store as ANALYTICS_REPORT.",
            "default": true
          },
          "dataQualityChecks": {
            "title": "Data Quality Checks",
            "type": "boolean",
            "description": "Validate extracted data quality. Warns about empty fields, encoding issues, suspiciously short content.",
            "default": true
          },
          "requiredFields": {
            "title": "Required Fields",
            "type": "array",
            "description": "Fields that must be non-empty in pageFunctionResult. Items failing validation are flagged in analytics.",
            "items": {
              "type": "string"
            },
            "default": []
          },
          "saveSnapshots": {
            "title": "Save Snapshots on Error",
            "type": "boolean",
            "description": "Save HTML snapshots and screenshots when errors occur. Useful for debugging anti-bot blocks.",
            "default": true
          },
          "debugLog": {
            "title": "Debug Log",
            "type": "boolean",
            "description": "Enable verbose debug logging. Warning: significantly increases log volume.",
            "default": false
          },
          "browserLog": {
            "title": "Browser Console Log",
            "type": "boolean",
            "description": "Include browser console.log messages in the actor log. Useful for debugging page function.",
            "default": false
          },
          "datasetName": {
            "title": "Dataset Name",
            "type": "string",
            "description": "Store output in a named dataset instead of the default one. Useful for multi-step workflows.",
            "default": ""
          },
          "keyValueStoreName": {
            "title": "Key-Value Store Name",
            "type": "string",
            "description": "Use a named key-value store for screenshots and analytics. Useful for multi-step workflows.",
            "default": ""
          },
          "requestQueueName": {
            "title": "Request Queue Name",
            "type": "string",
            "description": "Use a named request queue. Useful for resuming crawls or sharing URLs between runs.",
            "default": ""
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}