{
  "openapi": "3.0.1",
  "info": {
    "title": "Beautiful Soup Cloud Runner",
    "description": "Beautiful Soup Cloud Runner runs Python BS4 scraping tasks on Apify. Use CSS extraction rules or custom scripts to scrape static HTML pages, follow links, use proxies, save CSV exports, trigger webhooks, and export compact datasets.",
    "version": "0.0",
    "x-build-id": "DpkGKa4OeLZ4i0c9q"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/sovanza.inc~beautiful-soup-cloud-runner/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-sovanza.inc-beautiful-soup-cloud-runner",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/sovanza.inc~beautiful-soup-cloud-runner/runs": {
      "post": {
        "operationId": "runs-sync-sovanza.inc-beautiful-soup-cloud-runner",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/sovanza.inc~beautiful-soup-cloud-runner/run-sync": {
      "post": {
        "operationId": "run-sync-sovanza.inc-beautiful-soup-cloud-runner",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "mode": {
            "title": "Run mode",
            "enum": [
              "builtin",
              "customScript"
            ],
            "type": "string",
            "description": "builtin: scrape URLs with declarative CSS extraction rules (no custom code). customScript: load and execute a user Python script with Beautiful Soup.",
            "default": "builtin"
          },
          "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "One or more URLs to fetch and parse with Beautiful Soup. Required for builtin mode; optional for customScript (passed to your script via context.start_urls).",
            "items": {
              "type": "string"
            }
          },
          "maxDepth": {
            "title": "Max crawl depth",
            "minimum": 0,
            "maximum": 10,
            "type": "integer",
            "description": "When greater than 0, follow same-origin links via the Apify request queue up to this depth (builtin mode only). 0 = scrape start URLs only.",
            "default": 0
          },
          "maxRequestsPerCrawl": {
            "title": "Max requests per crawl",
            "minimum": 1,
            "maximum": 10000,
            "type": "integer",
            "description": "Safety cap on total pages processed in one run (builtin mode with link following).",
            "default": 100
          },
          "sameOriginOnly": {
            "title": "Same-origin links only",
            "type": "boolean",
            "description": "When following links, enqueue only URLs on the same host as the seed URL.",
            "default": true
          },
          "extract": {
            "title": "Extraction rules",
            "type": "array",
            "description": "CSS-based extraction rules applied in builtin mode. Each rule extracts text, HTML, or attributes from matched elements.",
            "default": [
              {
                "name": "title",
                "selector": "title",
                "type": "text",
                "all": false
              },
              {
                "name": "h1",
                "selector": "h1",
                "type": "text",
                "all": false
              },
              {
                "name": "links",
                "selector": "a",
                "type": "attr",
                "attr": "href",
                "all": true
              }
            ]
          },
          "includeLinks": {
            "title": "Include page links",
            "type": "boolean",
            "description": "Include absolute href links found on each page in the dataset item.",
            "default": false
          },
          "includeHtml": {
            "title": "Include full HTML",
            "type": "boolean",
            "description": "If enabled, store full page HTML in each dataset item (can be large).",
            "default": false
          },
          "parser": {
            "title": "Beautiful Soup parser",
            "enum": [
              "lxml",
              "html.parser",
              "html5lib"
            ],
            "type": "string",
            "description": "Parser backend passed to BeautifulSoup(html, parser). lxml is fastest; html.parser needs no extra deps.",
            "default": "lxml"
          },
          "requestDelaySecs": {
            "title": "Request delay (seconds)",
            "minimum": 0,
            "maximum": 60,
            "type": "number",
            "description": "Minimum delay between HTTP requests (rate limiting).",
            "default": 0
          },
          "maxRetries": {
            "title": "Max retries per URL",
            "minimum": 0,
            "maximum": 10,
            "type": "integer",
            "description": "How many times to retry a URL if the HTTP request fails.",
            "default": 2
          },
          "retryDelaySecs": {
            "title": "Retry delay (seconds)",
            "minimum": 0,
            "maximum": 60,
            "type": "number",
            "description": "Sleep between retries.",
            "default": 3
          },
          "timeoutSecs": {
            "title": "Request timeout (seconds)",
            "minimum": 5,
            "maximum": 300,
            "type": "integer",
            "description": "Per-request HTTP timeout.",
            "default": 60
          },
          "saveCsvToKeyValueStore": {
            "title": "Save CSV (key-value store)",
            "type": "boolean",
            "description": "If enabled, writes a CSV summary of dataset items to the default key-value store as OUTPUT.csv.",
            "default": false
          },
          "webhookCallbackUrl": {
            "title": "Webhook callback URL",
            "type": "string",
            "description": "Optional URL to POST a JSON run summary when the Actor finishes (success or partial failure). Useful for workflow integration."
          },
          "scriptModule": {
            "title": "Script module path",
            "type": "string",
            "description": "Path to a Python module inside the Actor (e.g. scripts/example_titles_links.py). Required in customScript mode unless scriptSource is provided."
          },
          "scriptSource": {
            "title": "Inline script source (optional)",
            "type": "string",
            "description": "Optional inline Python source code. When set, overrides scriptModule. Must define the entry function. Stored as a secret input and never written to the dataset."
          },
          "entryFunction": {
            "title": "Entry function name",
            "type": "string",
            "description": "Name of the function to call in your script (default: run). Signature: run(context) or async run(context).",
            "default": "run"
          },
          "scriptArgs": {
            "title": "Script arguments",
            "type": "object",
            "description": "Arbitrary JSON passed to your script via context.script_args.",
            "default": {}
          },
          "cookies": {
            "title": "Cookie header (optional)",
            "type": "string",
            "description": "Optional raw Cookie header value for authenticated sessions. Stored as a secret input and never written to the dataset."
          },
          "headers": {
            "title": "Request headers (optional)",
            "type": "object",
            "description": "Optional extra HTTP headers (JSON object). Stored as a secret input."
          },
          "proxyConfiguration": {
            "title": "Proxy configuration",
            "type": "object",
            "description": "Apify proxy settings. Residential proxy is recommended for blocked sites.",
            "default": {
              "useApifyProxy": true,
              "apifyProxyGroups": [
                "RESIDENTIAL"
              ],
              "apifyProxyCountry": "US"
            }
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}