{
  "openapi": "3.0.1",
  "info": {
    "title": "Universal Web Scraper - Extract Any URL",
    "description": "Pay-per-result web scraper with JS rendering, CSS selector / XPath / regex extraction, schema validation, retry on failure. Use for product catalogs, competitor pricing, news aggregation, lead generation. Fast (<2s/page), respects robots.txt by default.",
    "version": "1.0",
    "x-build-id": "Vq3oc9mhkcDnln8wG"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/lazymac~web-scraper-toolkit/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-lazymac-web-scraper-toolkit",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/lazymac~web-scraper-toolkit/runs": {
      "post": {
        "operationId": "runs-sync-lazymac-web-scraper-toolkit",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/lazymac~web-scraper-toolkit/run-sync": {
      "post": {
        "operationId": "run-sync-lazymac-web-scraper-toolkit",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "urls"
        ],
        "properties": {
          "urls": {
            "title": "URLs to Scrape",
            "type": "array",
            "description": "JSON array of URLs to scrape (maximum 10 per run). Each URL must be publicly accessible. Example: [\"https://example.com\", \"https://github.com\"]. Each URL is processed independently — if one fails, the others still succeed."
          },
          "mode": {
            "title": "Extraction Mode",
            "enum": [
              "full",
              "metadata",
              "links",
              "headlines",
              "images",
              "tables",
              "text",
              "custom"
            ],
            "type": "string",
            "description": "What data to extract from each page. 'full' returns metadata + headlines + links + images + tables. 'metadata' returns title, description, OG tags, canonical, language. 'links' returns all unique links with anchor text. 'headlines' returns H1, H2, H3 headings. 'images' returns all images with alt text. 'tables' parses HTML tables into structured rows. 'text' returns clean body text (no scripts/styles/nav/footer). 'custom' extracts elements matching a CSS selector.",
            "default": "full"
          },
          "selector": {
            "title": "CSS Selector (for Custom Mode)",
            "type": "string",
            "description": "CSS selector for custom mode extraction. Supports all standard CSS selectors: element (div, p), class (.class-name), ID (#id), attribute ([href]), combinators (div > p, ul li), pseudo-classes (:first-child, :nth-of-type(2)). Only used when mode is 'custom'. Examples: '.article-title', '#main-content p', 'table.data-table tr td'."
          },
          "timeout": {
            "title": "Request Timeout (ms)",
            "minimum": 1000,
            "maximum": 60000,
            "type": "integer",
            "description": "Maximum time in milliseconds to wait for each URL to respond. Default is 15000 (15 seconds). Increase for slow servers or large pages. Each URL is timed independently.",
            "default": 15000
          },
          "maxLinksPerPage": {
            "title": "Max Links Per Page",
            "minimum": 1,
            "maximum": 5000,
            "type": "integer",
            "description": "Maximum number of links to extract per page in 'links' and 'full' modes. Default is 50 for full mode, unlimited for links mode. Set a lower value to limit output size.",
            "default": 500
          },
          "maxImagesPerPage": {
            "title": "Max Images Per Page",
            "minimum": 1,
            "maximum": 2000,
            "type": "integer",
            "description": "Maximum number of images to extract per page in 'images' and 'full' modes. Default is 20 for full mode, unlimited for images mode. Set a lower value to limit output size.",
            "default": 200
          },
          "userAgent": {
            "title": "Custom User-Agent",
            "type": "string",
            "description": "Custom User-Agent header for HTTP requests. By default, uses a standard browser-like User-Agent. Set a custom value to simulate a specific bot or browser."
          },
          "includeHtml": {
            "title": "Include Raw HTML",
            "type": "boolean",
            "description": "When enabled, includes the raw HTML of each matched element in custom mode, or the full page HTML in other modes. Useful for debugging. Disabled by default to keep output compact.",
            "default": false
          },
          "proxyConfiguration": {
            "title": "Proxy Configuration",
            "type": "object",
            "description": "Apify proxy configuration. Use residential or datacenter proxies to avoid IP blocking. Format: {\"useApifyProxy\": true, \"apifyProxyGroups\": [\"RESIDENTIAL\"]}."
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}