{
  "openapi": "3.0.1",
  "info": {
    "title": "Wayback Machine CDX Bulk Extractor",
    "description": "Bulk extract archived snapshot metadata from the Wayback Machine CDX API. Get every crawled URL, timestamp, HTTP status code, MIME type, and content digest for any domain or URL pattern. Export to JSON, CSV, or Excel.",
    "version": "0.1",
    "x-build-id": "WKXcwoq64otakx5Sd"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/automation-lab~wayback-machine-cdx-extractor/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-automation-lab-wayback-machine-cdx-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/automation-lab~wayback-machine-cdx-extractor/runs": {
      "post": {
        "operationId": "runs-sync-automation-lab-wayback-machine-cdx-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/automation-lab~wayback-machine-cdx-extractor/run-sync": {
      "post": {
        "operationId": "run-sync-automation-lab-wayback-machine-cdx-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "url"
        ],
        "properties": {
          "url": {
            "title": "URL or domain",
            "type": "string",
            "description": "The URL, domain, or wildcard pattern to query. Examples: 'example.com', 'https://example.com/blog/*', '*.example.com/*'. For full domain coverage use domain matchType."
          },
          "matchType": {
            "title": "Match type",
            "enum": [
              "exact",
              "prefix",
              "host",
              "domain"
            ],
            "type": "string",
            "description": "Controls URL matching: 'exact' matches only the exact URL, 'prefix' matches all URLs starting with the given URL, 'host' matches all URLs on the same host, 'domain' matches the host and all subdomains.",
            "default": "domain"
          },
          "maxSnapshots": {
            "title": "Max snapshots",
            "minimum": 0,
            "type": "integer",
            "description": "Maximum number of snapshots to extract. Set to 0 for unlimited (extract all available snapshots). Default: 1000.",
            "default": 1000
          },
          "fromDate": {
            "title": "From date (YYYYMMDD)",
            "pattern": "^(\\d{8}(\\d{6})?)?$",
            "type": "string",
            "description": "Start date for filtering snapshots (inclusive). Format: YYYYMMDD or YYYYMMDDHHMMSS. Leave empty for no start filter."
          },
          "toDate": {
            "title": "To date (YYYYMMDD)",
            "pattern": "^(\\d{8}(\\d{6})?)?$",
            "type": "string",
            "description": "End date for filtering snapshots (inclusive). Format: YYYYMMDD or YYYYMMDDHHMMSS. Leave empty for no end filter."
          },
          "filterStatusCodes": {
            "title": "Filter by status codes",
            "type": "array",
            "description": "Only include snapshots with these HTTP status codes. Example: [200, 301]. Leave empty to include all status codes.",
            "default": []
          },
          "excludeStatusCodes": {
            "title": "Exclude status codes",
            "type": "array",
            "description": "Exclude snapshots with these HTTP status codes. Example: [404, 500]. Applied after filterStatusCodes.",
            "default": []
          },
          "filterMimeTypes": {
            "title": "Filter by MIME types",
            "type": "array",
            "description": "Only include snapshots with these MIME types. Example: ['text/html', 'application/pdf']. Leave empty to include all MIME types.",
            "default": []
          },
          "pageSize": {
            "title": "Page size",
            "minimum": 100,
            "maximum": 150000,
            "type": "integer",
            "description": "Number of records per CDX API request. Higher values reduce API calls but may time out for dense domains. Default: 10000.",
            "default": 10000
          },
          "collapse": {
            "title": "Collapse duplicates",
            "enum": [
              "",
              "urlkey",
              "digest",
              "timestamp:4",
              "timestamp:6",
              "timestamp:8"
            ],
            "type": "string",
            "description": "Collapse consecutive records with the same value for a given field. 'urlkey' deduplicates by unique URL, 'digest' deduplicates by identical content. Leave empty to get all snapshots.",
            "default": ""
          },
          "outputWaybackUrl": {
            "title": "Include Wayback Machine URL",
            "type": "boolean",
            "description": "Add a waybackUrl field with the full Wayback Machine replay URL for each snapshot (https://web.archive.org/web/{timestamp}/{url}).",
            "default": true
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}