{
  "openapi": "3.0.1",
  "info": {
    "title": "Pastebin Keyword Search & OSINT Scraper",
    "description": "Search public Pastebin archive data by keyword or regex, auto-expand into syntax archives, and stream matching OSINT results or a no-match search summary.",
    "version": "0.10",
    "x-build-id": "sjrBRCAM6zVlN6Vm9"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/thescrapelab~pastebin-osint-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-thescrapelab-pastebin-osint-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/thescrapelab~pastebin-osint-scraper/runs": {
      "post": {
        "operationId": "runs-sync-thescrapelab-pastebin-osint-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/thescrapelab~pastebin-osint-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-thescrapelab-pastebin-osint-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "runMode": {
            "title": "Run mode",
            "enum": [
              "scrape",
              "url_index"
            ],
            "type": "string",
            "description": "Choose whether the actor should scrape paste contents or only collect lightweight Pastebin URLs for a cumulative scheduled index. URL index mode does not fetch raw paste text.",
            "default": "scrape"
          },
          "maxPastesPerRun": {
            "title": "Maximum archive entries to inspect",
            "minimum": 1,
            "maximum": 5000,
            "type": "integer",
            "description": "Upper limit for Pastebin archive and optional search-engine entries inspected before filters and result limits are applied. The actor can inspect up to 5,000 entries per run across the selected discovery sources, but public sources may expose fewer entries.",
            "default": 25
          },
          "maxResults": {
            "title": "Maximum dataset items to save",
            "minimum": 1,
            "maximum": 500,
            "type": "integer",
            "description": "Upper limit for matching paste records saved in scrape mode. URL index mode uses urlIndexMaxRecordsPerRun instead.",
            "default": 3
          },
          "urlIndexMaxRecordsPerRun": {
            "title": "URL records per index run",
            "minimum": 1,
            "maximum": 5000,
            "type": "integer",
            "description": "Maximum new paste_url records to save in URL index mode. Use this with Apify Schedules to build a cumulative low-storage URL history over time.",
            "default": 250
          },
          "urlIndexDatasetName": {
            "title": "URL index dataset name",
            "type": "string",
            "description": "Named Apify dataset where URL index mode appends cumulative paste_url records across scheduled runs.",
            "default": "pastebin-url-index"
          },
          "urlIndexStateStoreName": {
            "title": "URL index state store name",
            "type": "string",
            "description": "Named key-value store used to remember recently indexed paste IDs and avoid saving duplicate URL records across scheduled runs.",
            "default": "pastebin-url-index-state"
          },
          "urlIndexDeduplicate": {
            "title": "Skip already indexed URLs",
            "type": "boolean",
            "description": "When enabled, URL index mode uses the named state store to avoid appending paste IDs that were already collected by previous scheduled runs.",
            "default": true
          },
          "urlIndexRecentIdLimit": {
            "title": "Remember recent URL IDs",
            "minimum": 1,
            "maximum": 1000000,
            "type": "integer",
            "description": "How many recently indexed paste IDs to keep in the dedupe state store. Higher values reduce duplicates across long schedules but slightly increase key-value storage.",
            "default": 200000
          },
          "urlIndexSaveToDefaultDataset": {
            "title": "Also save URL records to run dataset",
            "type": "boolean",
            "description": "When enabled, URL index mode writes each new paste_url record to both the cumulative named dataset and the current run's default dataset for easier run-by-run inspection.",
            "default": true
          },
          "useUrlIndex": {
            "title": "Search collected URL index",
            "type": "boolean",
            "description": "In normal scrape mode, search previously collected paste_url records from the named URL index dataset only after the live archive search finds no keyword matches. This lets client runs go further back while avoiding extra raw paste fetches when live data already matched.",
            "default": true
          },
          "maxUrlIndexEntriesToSearch": {
            "title": "Indexed URLs to search",
            "minimum": 0,
            "maximum": 5000,
            "type": "integer",
            "description": "Maximum recently collected URL index records to search as a fallback after live keyword search finds no matches. Higher values go further back, but increase raw paste fetches and runtime.",
            "default": 100
          },
          "fetchDetailMetadata": {
            "title": "Fetch author and date",
            "type": "boolean",
            "description": "When enabled, the actor makes one extra Pastebin page request for each saved item to enrich it with author and publication timestamp. Leave this off for the cheapest high-volume runs.",
            "default": false
          },
          "keywords": {
            "title": "Keywords",
            "maxItems": 100,
            "uniqueItems": true,
            "type": "array",
            "description": "Optional words or phrases to search for in discovered public paste text. If you enter keywords, the actor saves only pastes that contain at least one of them. Up to 100 keywords are accepted.",
            "items": {
              "type": "string",
              "maxLength": 256
            },
            "default": []
          },
          "discoveryMode": {
            "title": "Discovery depth",
            "enum": [
              "recent",
              "expanded",
              "deep"
            ],
            "type": "string",
            "description": "How far to look for public paste IDs before keyword filtering. Recent checks only the rolling public archive. Expanded adds common syntax archives when you request more than the main archive usually exposes. Deep fetches Pastebin's public language archive list and scans up to the configured syntax archive limit. Deep is broader and slower, but it is still not a complete historical Pastebin search.",
            "default": "expanded"
          },
          "syntaxArchives": {
            "title": "Syntax archives",
            "maxItems": 25,
            "uniqueItems": true,
            "type": "array",
            "description": "Optional Pastebin syntax archives to include in discovery. These pages can expose additional and sometimes older public paste IDs than the main rolling archive. Choose suggested values or type another Pastebin syntax slug.",
            "items": {
              "type": "string",
              "enumSuggestedValues": [
                "bash",
                "python",
                "javascript",
                "json",
                "xml",
                "sql",
                "yaml",
                "php",
                "java",
                "cpp",
                "c",
                "csharp",
                "go",
                "rust",
                "powershell",
                "text"
              ],
              "enumTitles": [
                "Bash",
                "Python",
                "JavaScript",
                "JSON",
                "XML",
                "SQL",
                "YAML",
                "PHP",
                "Java",
                "C++",
                "C",
                "C#",
                "Go",
                "Rust",
                "PowerShell",
                "Text"
              ]
            },
            "default": []
          },
          "maxSyntaxArchivesToScan": {
            "title": "Maximum syntax archives to scan",
            "minimum": 1,
            "maximum": 266,
            "type": "integer",
            "description": "Only used for deep discovery. Pastebin currently exposes hundreds of language archive pages, so this cap controls how many of those pages are fetched before paste IDs are deduplicated and selected for processing.",
            "default": 75
          },
          "autoExpandSyntaxArchives": {
            "title": "Automatically search more archive pages",
            "type": "boolean",
            "description": "Backwards-compatible switch for automatic archive expansion. Leave enabled for expanded and deep discovery. Turn it off only when you want the actor to inspect the main rolling archive unless syntaxArchives are provided manually.",
            "default": true
          },
          "saveNoMatchSummary": {
            "title": "Save a no-match summary result",
            "type": "boolean",
            "description": "When enabled, keyword runs that inspect public pastes but find no matching paste save one search_summary dataset item explaining what was searched and how to widen the run.",
            "default": true
          },
          "searchEngineDiscovery": {
            "title": "Add search-engine discovery",
            "type": "boolean",
            "description": "Optional best-effort fallback that searches externally indexed Pastebin URLs for each keyword and adds discovered paste IDs to the run. This can find older public pastes, but search engines may throttle, omit, or change results.",
            "default": false
          },
          "maxSearchResultsPerKeyword": {
            "title": "Search results per keyword",
            "minimum": 1,
            "maximum": 50,
            "type": "integer",
            "description": "Maximum externally indexed Pastebin URLs to add per keyword when searchEngineDiscovery is enabled.",
            "default": 20
          },
          "noMatchStopAfterPastes": {
            "title": "Stop if no matches after",
            "minimum": 0,
            "maximum": 5000,
            "type": "integer",
            "description": "Cost guard for keyword searches. If zero matching paste records have been saved after this many processed pastes, the actor stops early and writes a summary. Set to 0 to disable and scan the full maxPastesPerRun budget.",
            "default": 1000
          },
          "stopAfterPastesWithoutNewMatch": {
            "title": "Stop after no new matches",
            "minimum": 0,
            "maximum": 5000,
            "type": "integer",
            "description": "Cost guard for sparse keyword searches. After the first matching paste is saved, the actor stops if this many additional processed pastes do not produce another saved match. Set to 0 to disable and scan the full maxPastesPerRun budget.",
            "default": 1000
          },
          "regexPatterns": {
            "title": "Regex patterns",
            "maxItems": 25,
            "uniqueItems": true,
            "type": "array",
            "description": "Optional Python-compatible regex patterns used to extract emails, keys, URLs, or other structured matches from saved pastes. Up to 25 patterns are accepted.",
            "items": {
              "type": "string",
              "maxLength": 1000
            },
            "default": []
          },
          "maxConcurrency": {
            "title": "Maximum parallel paste fetches",
            "minimum": 1,
            "maximum": 8,
            "type": "integer",
            "description": "How many paste raw-text requests can run at the same time. Lower values reduce memory and blocking risk; higher values can finish faster when responses are small.",
            "default": 4
          },
          "requestTimeoutSecs": {
            "title": "Request timeout",
            "minimum": 5,
            "maximum": 120,
            "type": "integer",
            "description": "Maximum time to wait for one Pastebin/archive/proxy request before retrying. Use lower values for faster failover and higher values only when proxy responses are slow.",
            "default": 20
          },
          "maxRawTextBytes": {
            "title": "Maximum raw paste size",
            "minimum": 10000,
            "maximum": 5000000,
            "type": "integer",
            "description": "Largest raw paste response the actor will hold in memory. Larger pastes are skipped and counted in RUN_SUMMARY as oversized_pastes.",
            "default": 1000000
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}