{
  "openapi": "3.0.1",
  "info": {
    "title": "Sitemap & URL Discovery - Find All URLs on Any Site",
    "description": "Discover every URL on any website by parsing sitemap.xml, robots.txt, and sitemap indexes. Extract URLs with last modified dates, change frequency, and priority. Perfect for SEO audits, content analysis, crawling preparation, and site mapping.",
    "version": "1.0",
    "x-build-id": "J1t1UB2tOCdMXhFNc"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/santamaria-automations~sitemap-url-discovery/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-santamaria-automations-sitemap-url-discovery",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/santamaria-automations~sitemap-url-discovery/runs": {
      "post": {
        "operationId": "runs-sync-santamaria-automations-sitemap-url-discovery",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/santamaria-automations~sitemap-url-discovery/run-sync": {
      "post": {
        "operationId": "run-sync-santamaria-automations-sitemap-url-discovery",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "websites"
        ],
        "properties": {
          "websites": {
            "title": "Websites",
            "type": "array",
            "description": "List of website URLs to analyze. You can provide bare domains (example.com) or full URLs (https://example.com). The actor will probe robots.txt and the default sitemap locations for each.",
            "items": {
              "type": "string"
            }
          },
          "followSitemapIndex": {
            "title": "Follow Sitemap Index",
            "type": "boolean",
            "description": "When a sitemap index (<sitemapindex>) is detected, recursively fetch all child sitemaps. Disable to only inspect the top-level sitemap.",
            "default": true
          },
          "respectRobotsTxt": {
            "title": "Read robots.txt",
            "type": "boolean",
            "description": "Read robots.txt and follow any Sitemap: directives found. When disabled, only the default sitemap locations (/sitemap.xml, /sitemap_index.xml, /sitemap.xml.gz) are probed.",
            "default": true
          },
          "maxUrlsPerSite": {
            "title": "Max URLs per Site",
            "minimum": 1,
            "maximum": 1000000,
            "type": "integer",
            "description": "Safety cap on URLs returned per website. Large sites (news, e-commerce) can have hundreds of thousands of URLs — increase this value if you need the full set.",
            "default": 10000
          },
          "maxDepth": {
            "title": "Max Sitemap Depth",
            "minimum": 1,
            "maximum": 10,
            "type": "integer",
            "description": "Maximum recursion depth when following nested sitemap indexes. Depth 1 = top-level index + its child sitemaps. Most sites have depth 1-2.",
            "default": 3
          },
          "includeLastmod": {
            "title": "Include Last Modified",
            "type": "boolean",
            "description": "Include the <lastmod> date when available. Useful for detecting new or recently updated content.",
            "default": true
          },
          "includeImages": {
            "title": "Include Images",
            "type": "boolean",
            "description": "Include <image:loc> entries from image sitemaps. When enabled, image URLs are emitted as extra rows with is_image=true.",
            "default": false
          },
          "timeoutSeconds": {
            "title": "Per-Sitemap Timeout (seconds)",
            "minimum": 5,
            "maximum": 300,
            "type": "integer",
            "description": "HTTP timeout applied to every sitemap, robots.txt, and child sitemap fetch.",
            "default": 30
          },
          "proxyConfiguration": {
            "title": "Proxy Configuration",
            "type": "object",
            "description": "Optional Apify proxy settings. Most sitemaps are served without anti-bot protection, so proxy is usually not needed.",
            "default": {
              "useApifyProxy": false
            }
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}