{
  "openapi": "3.0.1",
  "info": {
    "title": "Sitemap Analyzer — Recursive Parse, Health Check, AI Tags",
    "description": "Parse any sitemap.xml recursively, extract all URLs with metadata, check HTTP health status, and optionally cluster URLs by topic using Claude AI. Perfect for SEO audits and site migration.",
    "version": "0.0",
    "x-build-id": "DmTIISfClLmwH6K4J"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/elder_contrabasson~sitemap-analyzer/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-elder_contrabasson-sitemap-analyzer",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/elder_contrabasson~sitemap-analyzer/runs": {
      "post": {
        "operationId": "runs-sync-elder_contrabasson-sitemap-analyzer",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/elder_contrabasson~sitemap-analyzer/run-sync": {
      "post": {
        "operationId": "run-sync-elder_contrabasson-sitemap-analyzer",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "siteUrl"
        ],
        "properties": {
          "siteUrl": {
            "title": "Site URL, subfolder URL, or direct sitemap URL",
            "type": "string",
            "description": "Accepts three kinds of input: (a) a root domain - robots.txt and common paths are tried automatically; (b) a subfolder URL like example.com/blog - the subfolder sitemap is resolved, filtering robots.txt directives by the subpath; (c) a direct sitemap URL (.xml, .xml.gz, or sitemap-index).",
            "default": "https://www.apify.com"
          },
          "sitemapUrl": {
            "title": "Explicit sitemap URL (optional, overrides auto-detection)",
            "type": "string",
            "description": "If set, this exact sitemap URL is used and all auto-detection is skipped. Most reliable when you already know the sitemap address. Accepts .xml, .xml.gz, and sitemap-index files."
          },
          "maxUrls": {
            "title": "Max URLs to process",
            "minimum": 0,
            "maximum": 100000,
            "type": "integer",
            "description": "Hard cap on URLs extracted (URL mode). Crawl stops when reached. Set to 0 for unlimited.",
            "default": 1000
          },
          "maxSitemaps": {
            "title": "Max child sitemaps to process",
            "minimum": 0,
            "maximum": 10000,
            "type": "integer",
            "description": "Hard cap on the number of child sitemaps walked (both modes). Keeps runs predictable on huge sitemap-indexes. 0 = unlimited.",
            "default": 0
          },
          "maxDepth": {
            "title": "Max sitemap-index nesting depth",
            "minimum": 1,
            "maximum": 12,
            "type": "integer",
            "description": "How deep to follow nested sitemap-index files. Depth 0 = the entry sitemap itself.",
            "default": 6
          },
          "requestDelayMs": {
            "title": "Delay between sitemap requests (ms)",
            "minimum": 0,
            "maximum": 5000,
            "type": "integer",
            "description": "Pause between consecutive sitemap fetches. Higher values reduce anti-bot soft-bans on strict sites (800-1500ms recommended for sites with anti-bot protection). 0 = no delay (fastest).",
            "default": 300
          },
          "enableHealthCheck": {
            "title": "Check HTTP status of each URL",
            "type": "boolean",
            "description": "Send HEAD request to each URL to verify it returns 200. Adds ~1 sec per URL.",
            "default": false
          },
          "healthCheckConcurrency": {
            "title": "Concurrent health check requests",
            "minimum": 1,
            "maximum": 50,
            "type": "integer",
            "description": "How many URLs to check in parallel. Higher = faster but more aggressive on target site.",
            "default": 10
          },
          "enableAiClustering": {
            "title": "Cluster URLs by topic using AI (optional)",
            "type": "boolean",
            "description": "Group URLs into topic clusters using Claude (e.g. product pages, blog posts, support docs). Requires Anthropic API key.",
            "default": false
          },
          "anthropicApiKey": {
            "title": "Anthropic API key (only if AI clustering enabled)",
            "type": "string",
            "description": "Your Anthropic API key (sk-ant-...). BYOK - we don't store it. Get one at console.anthropic.com"
          },
          "respectRobotsTxt": {
            "title": "Respect robots.txt",
            "type": "boolean",
            "description": "Check robots.txt before scraping and skip URLs that are disallowed.",
            "default": true
          },
          "sitemapTreeMode": {
            "title": "Sitemap tree mode (analyze child sitemaps instead of URLs)",
            "type": "boolean",
            "description": "Instead of extracting URLs, list every child sitemap inside the index: its URL, how many URLs it contains, depth, and parent. Useful for competitor scale analysis.",
            "default": false
          },
          "sampleUrlsPerSitemap": {
            "title": "Sample URLs per child sitemap (tree mode only)",
            "minimum": 0,
            "maximum": 100,
            "type": "integer",
            "description": "In tree mode, also pull this many sample URLs from each child sitemap (0 = none, just counts).",
            "default": 0
          },
          "useProxy": {
            "title": "Use Apify Proxy (datacenter)",
            "type": "boolean",
            "description": "Route requests through Apify datacenter proxy + rotate User-Agent. Helps against rate-limiting and IP bans. Does NOT bypass Cloudflare/DataDome JS-challenges.",
            "default": false
          },
          "proxyType": {
            "title": "Proxy type (only if proxy enabled)",
            "enum": [
              "datacenter",
              "residential"
            ],
            "type": "string",
            "description": "Datacenter — fast and cheap, good vs rate-limits. Residential — slower and more expensive, but bypasses many anti-bot systems. Residential requires the RESIDENTIAL proxy group to be available in your Apify account.",
            "default": "datacenter"
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}