{
  "openapi": "3.0.1",
  "info": {
    "title": "Website Contact Extractor - Emails & Phones",
    "description": "Website contact extractor and contact details extractor for B2B lead lists and CRM enrichment. Extract emails, phones, social profiles, addresses, contact pages, tech signals, email verification, phone validation, and role labels.",
    "version": "1.9",
    "x-build-id": "B4X5XvR3jEDicCxcJ"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/harvestlab~contact-extractor/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-harvestlab-contact-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/harvestlab~contact-extractor/runs": {
      "post": {
        "operationId": "runs-sync-harvestlab-contact-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/harvestlab~contact-extractor/run-sync": {
      "post": {
        "operationId": "run-sync-harvestlab-contact-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "urls": {
            "title": "Website URLs",
            "type": "array",
            "description": "List of full website URLs to analyze (e.g. https://example.com). You can provide either URLs or domains.",
            "items": {
              "type": "string"
            }
          },
          "startUrls": {
            "title": "Start URLs (alias for Website URLs)",
            "type": "array",
            "description": "Alternative field name for Website URLs, provided for compatibility with Apify's common 'startUrls' convention (e.g. https://example.com). The canonical field is `urls` above - prefer that. If both are supplied, they are merged.",
            "items": {
              "type": "string"
            }
          },
          "url": {
            "title": "URL (CLI alias)",
            "type": "string",
            "description": "CLI alias for a single website URL. Hidden from Console form."
          },
          "website": {
            "title": "Website (CLI alias)",
            "type": "string",
            "description": "CLI alias for a single website URL. Hidden from Console form."
          },
          "domains": {
            "title": "Domains",
            "type": "array",
            "description": "List of domains without protocol (e.g. example.com). HTTPS will be added automatically. Alternative to providing full URLs - use this OR Website URLs above.",
            "items": {
              "type": "string"
            }
          },
          "maxWebsites": {
            "title": "Max Websites To Process",
            "minimum": 1,
            "maximum": 200,
            "type": "integer",
            "description": "Bill-safety cap across Website URLs, Start URLs, URL aliases, and Domains after deduplication. Default 50; raise deliberately for larger lead batches.",
            "default": 50
          },
          "maxPagesPerSite": {
            "title": "Max Pages Per Site",
            "minimum": 1,
            "maximum": 20,
            "type": "integer",
            "description": "Maximum number of pages to crawl per website. Higher values find more contacts but take longer.",
            "default": 5
          },
          "includeSubpages": {
            "title": "Include Subpages",
            "type": "boolean",
            "description": "Automatically crawl /contact, /about, /imprint, and similar pages to find additional contact information.",
            "default": true
          },
          "detectTechStack": {
            "title": "Detect Tech Stack",
            "type": "boolean",
            "description": "Analyze the website to detect technologies used (CMS, frameworks, analytics, etc.).",
            "default": true
          },
          "verifyEmails": {
            "title": "Verify Email Deliverability (MX-record check)",
            "type": "boolean",
            "description": "Look up real MX records for every extracted email, detect common-domain typos (e.g. gmial.com -> gmail.com), flag free/disposable inboxes, and tag each email HIGH / MEDIUM / LOW / UNKNOWN deliverability. Adds a new `emails_verified` field to every output item. Cost: $0.01 per email whose domain passes MX resolution (uncertain / disposable / no-MX emails are free).",
            "default": false
          },
          "maxEmailsToVerify": {
            "title": "Max Emails To Verify Per Run",
            "minimum": 1,
            "maximum": 500,
            "type": "integer",
            "description": "Bill-safety cap for MX and SMTP email verification across the whole run. Raw extracted emails are still returned; only verification is capped. Default 50, maximum 500.",
            "default": 50
          },
          "deepEmailVerification": {
            "title": "Deep SMTP Probe - Real Mailbox Verification (v1.7)",
            "type": "boolean",
            "description": "Opt-in: run an async SMTP RCPT TO probe against the primary MX host for every MX-cleared email. Classifies each email as deliverable / undeliverable / catchall / greylisted / port_blocked. Reputation-safe: per-MX-host probe history persists across runs (named KV store), capped at 10 probes/host/day with 10s cooldown between probes to the same host. Port 25 is frequently blocked on Apify datacenter egress; when it is, every entry returns `port_blocked` (free). Cost: $0.02 per email that receives a definitive verdict (deliverable or undeliverable) - catch-all, greylisted, and non-verdict results are free. Requires `verifyEmails: true`.",
            "default": false
          },
          "enableAiAnalysis": {
            "title": "Enable AI Contact Enrichment",
            "type": "boolean",
            "description": "Use an LLM to classify each email by role (sales/support/hr/legal/executive/general/personal), group near-duplicate team addresses, flag non-monitored no-reply inboxes, and pick the single best primary contact for B2B outreach. When `verifyEmails` is also enabled, the AI picks the primary contact only from HIGH/MEDIUM deliverability emails. Requires an API key for your chosen LLM provider. Cost: $0.05 per website analyzed.",
            "default": false
          },
          "llmProvider": {
            "title": "LLM Provider",
            "enum": [
              "openrouter",
              "anthropic",
              "google",
              "openai",
              "ollama"
            ],
            "type": "string",
            "description": "AI backend for email role classification, dedup, and primary-contact selection. 'OpenRouter' (default) is cheapest - Gemini Flash via OpenRouter is ~$0.001 per website. 'Anthropic' = Claude, 'Google AI' = Gemini direct, 'OpenAI' = GPT-4o mini, 'Ollama' = self-hosted (no API cost). Each provider needs its own API key field below.",
            "default": "openrouter"
          },
          "llmModel": {
            "title": "LLM Model",
            "type": "string",
            "description": "Specific model to use. Leave empty for the provider default (google/gemini-2.0-flash-001 for OpenRouter, claude-sonnet-4-20250514 for Anthropic, gemini-2.0-flash for Google AI, gpt-4o-mini for OpenAI, llama3.1 for Ollama)."
          },
          "openrouterApiKey": {
            "title": "OpenRouter API Key",
            "type": "string",
            "description": "Your OpenRouter API key. Get one at openrouter.ai/keys"
          },
          "anthropicApiKey": {
            "title": "Anthropic API Key",
            "type": "string",
            "description": "Your Anthropic API key. Get one at console.anthropic.com"
          },
          "googleApiKey": {
            "title": "Google AI API Key",
            "type": "string",
            "description": "API key for Google AI (Gemini). Get one at aistudio.google.com/app/apikey"
          },
          "openaiApiKey": {
            "title": "OpenAI API Key",
            "type": "string",
            "description": "API key from platform.openai.com (required if using OpenAI provider)"
          },
          "ollamaBaseUrl": {
            "title": "Ollama Base URL",
            "type": "string",
            "description": "Base URL for Ollama API. Default: http://localhost:11434"
          },
          "proxyConfiguration": {
            "title": "Proxy Configuration",
            "type": "object",
            "description": "Proxy settings. Residential proxy is the default and strongly recommended for reliable results because many websites block datacenter IPs."
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}