{
  "openapi": "3.0.1",
  "info": {
    "title": "Website Job Extractor (HTTP)",
    "description": "Scrape job listings directly from company websites / career pages / ATS systems. Unlike job portals, letting you identify hiring intent the moment it happens. The freshest signal for B2B targeting. AI-extracted with automatic ATS detection and anti-hallucination validation.",
    "version": "2.0",
    "x-build-id": "0QbO8SaLkhn9oLDGX"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/santamaria-automations~website-job-extractor/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-santamaria-automations-website-job-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/santamaria-automations~website-job-extractor/runs": {
      "post": {
        "operationId": "runs-sync-santamaria-automations-website-job-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/santamaria-automations~website-job-extractor/run-sync": {
      "post": {
        "operationId": "run-sync-santamaria-automations-website-job-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "companies"
        ],
        "properties": {
          "companies": {
            "title": "Companies",
            "type": "array",
            "description": "List of companies to extract jobs from. Provide either a website_url (auto-discovers career pages) or specific career_urls.",
            "items": {
              "type": "object",
              "properties": {
                "company_id": {
                  "title": "Company ID",
                  "type": "string",
                  "description": "Your internal company identifier (UUID or any string)"
                },
                "company_name": {
                  "title": "Company Name",
                  "type": "string",
                  "description": "Name of the company (used for context in AI extraction)"
                },
                "website_url": {
                  "title": "Website URL",
                  "type": "string",
                  "description": "Company website URL. The actor will auto-discover career pages from the homepage navigation. Use this when you don't have specific career page URLs."
                },
                "career_urls": {
                  "title": "Career URLs",
                  "type": "array",
                  "description": "List of specific career page URLs to extract jobs from. Optional if website_url is provided.",
                  "items": {
                    "type": "string"
                  },
                  "default": []
                },
                "career_page_url": {
                  "title": "Career Page URL",
                  "type": "string",
                  "description": "Single pre-discovered career page URL (added as priority URL before career_urls)"
                },
                "website_domain": {
                  "title": "Website Domain",
                  "type": "string",
                  "description": "Website domain for cross-subdomain career discovery (e.g., tertianum.ch). Tries common career subdomains like jobs.domain.ch, karriere.domain.ch etc."
                }
              },
              "required": [
                "company_id",
                "company_name"
              ]
            }
          },
          "outputLanguage": {
            "title": "Output Language",
            "enum": [
              "en",
              "de",
              "fr",
              "it",
              "es",
              "pt",
              "nl",
              "auto"
            ],
            "type": "string",
            "description": "Language for text output fields (description, department, employment_type). 'auto' = match website language. Default: keep original language.",
            "default": "auto"
          },
          "llmProvider": {
            "title": "LLM Provider",
            "enum": [
              "gemini",
              "groq",
              "openrouter"
            ],
            "type": "string",
            "description": "Primary AI provider for job extraction. Gemini Flash is free and recommended.",
            "default": "gemini"
          },
          "fallbackProvider": {
            "title": "Fallback LLM Provider",
            "enum": [
              "gemini",
              "groq",
              "openrouter"
            ],
            "type": "string",
            "description": "Second-level fallback if primary fails (e.g., rate limit exceeded)."
          },
          "fallback2Provider": {
            "title": "Third-Level Fallback",
            "enum": [
              "gemini",
              "groq",
              "openrouter"
            ],
            "type": "string",
            "description": "Third-level fallback if both primary and first fallback fail."
          },
          "llmModel": {
            "title": "LLM Model",
            "type": "string",
            "description": "Override model (default: gemini-2.0-flash / llama-3.1-8b-instant)"
          },
          "llmApiKey": {
            "title": "LLM API Key (Groq/OpenRouter)",
            "type": "string",
            "description": "API key for Groq or OpenRouter. Not needed for Gemini if GEMINI_API_KEY env var is set."
          },
          "geminiApiKey": {
            "title": "Gemini API Key",
            "type": "string",
            "description": "API key for Google Gemini (free tier: 1M tokens/minute). Get one at https://aistudio.google.com/apikey"
          },
          "groqApiKey": {
            "title": "Groq API Key",
            "type": "string",
            "description": "API key for Groq (fast inference, free tier available). Get one at https://console.groq.com/keys"
          },
          "openrouterApiKey": {
            "title": "OpenRouter API Key",
            "type": "string",
            "description": "API key for OpenRouter (many free models available). Get one at https://openrouter.ai/keys"
          },
          "maxPagesPerCompany": {
            "title": "Max Pages Per Company",
            "minimum": 1,
            "maximum": 20,
            "type": "integer",
            "description": "Maximum number of career pages to process per company (includes sub-pages and pagination)",
            "default": 5
          },
          "maxConcurrency": {
            "title": "Max Concurrency",
            "minimum": 1,
            "maximum": 10,
            "type": "integer",
            "description": "Maximum number of concurrent HTTP requests",
            "default": 3
          },
          "jobKeywords": {
            "title": "Job Keywords Filter",
            "type": "array",
            "description": "Only extract jobs matching these keywords. When set, each job gets a relevance score (high/medium/low). Leave empty to extract all jobs.",
            "items": {
              "type": "string"
            }
          },
          "webhookUrl": {
            "title": "Webhook URL",
            "type": "string",
            "description": "URL to POST results to when extraction completes (e.g., your API endpoint)"
          },
          "skipAtsExtraction": {
            "title": "Skip LLM for Known ATS Platforms",
            "type": "boolean",
            "description": "When a supported ATS platform is detected (Lever, Greenhouse, Workday, Ashby, etc.), skip LLM extraction and output the ATS URL instead. Feed these URLs into the Career Site Jobs Scraper for faster, cheaper, structured extraction without hallucination risk. Supports 16 ATS platforms.",
            "default": false
          },
          "enablePlaywrightFallback": {
            "title": "Enable Playwright Fallback",
            "type": "boolean",
            "description": "Automatically run browser-based extraction for companies flagged with JS rendering issues. Starts a separate actor run (additional cost).",
            "default": false
          },
          "proxyConfiguration": {
            "title": "Proxy Configuration",
            "type": "object",
            "description": "Proxy settings. Datacenter proxies work for most career pages. Residential may be needed for heavily-protected sites."
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}