{
  "openapi": "3.0.1",
  "info": {
    "title": "Smart Page Fetcher — HTML, Markdown & Text",
    "description": "Fetch a batch of URLs and get the page as HTML, Markdown, or clean text. Tries plain HTTP first, renders JavaScript in a real browser when needed, and escalates to stealth + residential proxy for Cloudflare-protected, bot-defended pages, per URL. Pay only for the difficulty each URL needed.",
    "version": "0.0",
    "x-build-id": "x8ULqg8nGpgJVZSiV"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/shelvick~smart-page-fetcher/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-shelvick-smart-page-fetcher",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/shelvick~smart-page-fetcher/runs": {
      "post": {
        "operationId": "runs-sync-shelvick-smart-page-fetcher",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/shelvick~smart-page-fetcher/run-sync": {
      "post": {
        "operationId": "run-sync-shelvick-smart-page-fetcher",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "urls"
        ],
        "properties": {
          "urls": {
            "title": "URLs",
            "minItems": 1,
            "maxItems": 500,
            "type": "array",
            "description": "List of URLs to fetch, 1-500 per batch. Each entry is a string URL or an object `{\"url\": \"...\", \"headers\": {...}}` to set per-URL request headers. Allowed header names: Accept, Accept-Language, Accept-Encoding, User-Agent, Referer, Content-Type. Anything else (Cookie, Authorization, X-*) is rejected at input validation -- the Actor is a general-purpose unauthenticated fetcher. URLs are processed as a batch with cheapest-tier-first escalation."
          },
          "basic": {
            "title": "Basic HTTP tier",
            "enum": [
              "auto",
              "true",
              "false"
            ],
            "type": "string",
            "description": "Controls the plain-HTTP tier. 'auto' lets it participate in the escalation chain (lowest cost first). 'false' skips it (use when callers know JS is required). 'true' forces the chain to start here (the default behavior in most cases).",
            "default": "auto"
          },
          "js": {
            "title": "JavaScript render tier",
            "enum": [
              "auto",
              "true",
              "false"
            ],
            "type": "string",
            "description": "Controls the JavaScript-render tier (real browser, no stealth, no proxy). 'auto' uses it after basic fails. 'true' starts the chain here when caller knows JS is needed. 'false' caps cost at basic (or jumps straight to stealth if basic fails too).",
            "default": "auto"
          },
          "stealth": {
            "title": "Stealth + proxy tier",
            "enum": [
              "auto",
              "true",
              "false"
            ],
            "type": "string",
            "description": "Controls the stealth tier (anti-bot bypass via rotating residential proxy). 'auto' uses it as the last fallback. 'true' starts the chain here for known-defended targets (typical signals: 403/429 responses, Cloudflare challenges, JS-based bot detection) -- saves the cost of two failed lower-tier attempts. 'false' caps the per-URL price at the JS-render tier (URLs that need stealth come back as `failed` records, zero charge).",
            "default": "auto"
          },
          "outputs": {
            "title": "Output formats",
            "type": "array",
            "description": "Output formats per URL — all derived from one fetch, no extra charge per format. Available: html (raw, via KVS URL), cleaned_html (scripts/tracking stripped, structure kept), text (boilerplate-stripped, LLM-friendly), markdown, links, media, headings, tables, json_ld, og, meta. The a11y tree and full-page screenshot need the JS or stealth tier (returned as KVS URLs). Unknown names fail validation; see the README for full field shapes.",
            "items": {
              "type": "string"
            },
            "default": [
              "html",
              "markdown"
            ]
          },
          "runtime_budget_ms": {
            "title": "Total runtime budget (ms)",
            "minimum": 30000,
            "maximum": 3600000,
            "type": "integer",
            "description": "Total wall-clock budget for the whole batch. When exhausted, unprocessed URLs come back as 'deferred' records (zero charge) and the run ends cleanly. Default 270000 (4m30s) keeps synchronous callers under Apify's 5-minute sync API timeout with headroom. Raise up to 3600000 (60m) for large async batches: a 1000-URL batch at typical mixed tiers plans 15-20 min wall-clock, so 1200000 (20m) is a reasonable target.",
            "default": 270000
          },
          "country": {
            "title": "Proxy geo (stealth tier only)",
            "pattern": "^[A-Z]{2}$",
            "type": "string",
            "description": "Optional ISO-3166-1 alpha-2 country code (e.g. 'US', 'GB', 'DE'). Forwarded to the stealth tier's residential proxy for geo-targeting. Ignored by basic and JS tiers."
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}