{
  "openapi": "3.0.1",
  "info": {
    "title": "Hugging Face Scraper — AI Models, Datasets, Spaces & Papers",
    "description": "Export every AI model, dataset, space and daily paper from the Hugging Face Hub. Filter by task, library (transformers, diffusers, GGUF), language, license, author. Sort by downloads, likes, trending. Sibling files + README. Public HF API, no token. For AI builders, ML research, RAG and VC AI intel.",
    "version": "1.0",
    "x-build-id": "SSfe7zG0uNfXGOkCc"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/logiover~huggingface-hub-intelligence-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-logiover-huggingface-hub-intelligence-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/logiover~huggingface-hub-intelligence-scraper/runs": {
      "post": {
        "operationId": "runs-sync-logiover-huggingface-hub-intelligence-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/logiover~huggingface-hub-intelligence-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-logiover-huggingface-hub-intelligence-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "entityType": {
            "title": "Entity Type",
            "enum": [
              "models",
              "datasets",
              "spaces",
              "papers",
              "collections"
            ],
            "type": "string",
            "description": "Which entity to enumerate from the Hugging Face Hub. 'models' = ML models (~1M+), 'datasets' = training/evaluation datasets (~200k+), 'spaces' = hosted demo apps, 'papers' = daily curated research papers, 'collections' = curated lists of models/datasets/spaces.",
            "default": "models"
          },
          "search": {
            "title": "Search Query",
            "type": "string",
            "description": "Free-text search over name + description. Examples: 'llama', 'whisper', 'mistral', 'phi', 'instruct'. Leave empty to enumerate all entities matching filters.",
            "default": ""
          },
          "author": {
            "title": "Author / Organization Filter",
            "type": "string",
            "description": "Restrict to a single author or organization (e.g. 'mistralai', 'meta-llama', 'openai-community', 'stabilityai'). Substring match.",
            "default": ""
          },
          "pipelineTag": {
            "title": "Task / Pipeline Tag (models only)",
            "type": "string",
            "description": "Filter models by primary task. Common values: 'text-generation', 'text-classification', 'token-classification', 'feature-extraction', 'sentence-similarity', 'fill-mask', 'translation', 'summarization', 'question-answering', 'image-to-text', 'text-to-image', 'image-classification', 'object-detection', 'audio-classification', 'automatic-speech-recognition' (ASR), 'text-to-speech' (TTS), 'reinforcement-learning'.",
            "default": ""
          },
          "library": {
            "title": "Library Filter (models / datasets)",
            "type": "string",
            "description": "Filter by library. Common: 'transformers', 'diffusers', 'sentence-transformers', 'gguf', 'mlx', 'onnx', 'safetensors', 'pytorch', 'jax', 'tensorflow', 'datasets'.",
            "default": ""
          },
          "language": {
            "title": "Language Tag (models / datasets)",
            "type": "string",
            "description": "Filter by primary language. ISO 639-1 codes ('en', 'fr', 'de', 'tr', 'zh', 'ja', 'es') or special: 'multilingual'.",
            "default": ""
          },
          "tags": {
            "title": "Additional Tags Filter",
            "type": "array",
            "description": "Restrict to items whose tag list contains all of these. Examples: ['safetensors','region:us'], ['license:apache-2.0','llama'], ['gated:false']. Hugging Face supports complex tag filters at the API level.",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "sort": {
            "title": "Sort By",
            "enum": [
              "downloads",
              "likes",
              "lastModified",
              "createdAt",
              "trendingScore"
            ],
            "type": "string",
            "description": "Field to sort by. 'downloads' = most downloaded, 'likes' = community favorites, 'lastModified' = recently updated, 'createdAt' = newest creations, 'trendingScore' = HF's trending algorithm.",
            "default": "downloads"
          },
          "sortDirection": {
            "title": "Sort Direction",
            "enum": [
              "-1",
              "1"
            ],
            "type": "string",
            "description": "Sort direction: '-1' = descending (highest first), '1' = ascending.",
            "default": "-1"
          },
          "maxResults": {
            "title": "Maximum Results",
            "minimum": 1,
            "maximum": 1000000,
            "type": "integer",
            "description": "Hard cap on records returned. Set to 0 for unlimited (auto-paginates). Be aware: the full models catalog is ~1M+ items.",
            "default": 500
          },
          "fetchDetails": {
            "title": "Fetch Full Details Per Item",
            "type": "boolean",
            "description": "When enabled, for every model / dataset / space / paper, the actor makes an additional call to `/api/{type}/{id}` to fetch richer fields: full README content, sibling file list, model card data, dataset card data, config files, gated status, license, citation. Adds 1 HTTP request per item.",
            "default": false
          },
          "fetchReadme": {
            "title": "Fetch README Content",
            "type": "boolean",
            "description": "When enabled (requires 'Fetch Full Details'), the actor also pulls the raw model/dataset card (README.md) for each item. Useful for AI training datasets, model documentation indexing, and RAG over the Hub.",
            "default": false
          },
          "minDownloads": {
            "title": "Minimum Downloads (client-side filter)",
            "minimum": 0,
            "type": "integer",
            "description": "Drop entities with fewer than this many downloads. Useful for filtering out abandoned or experimental items.",
            "default": 0
          },
          "minLikes": {
            "title": "Minimum Likes (client-side filter)",
            "minimum": 0,
            "type": "integer",
            "description": "Drop entities with fewer than this many likes.",
            "default": 0
          },
          "modifiedFrom": {
            "title": "Modified From (ISO date)",
            "type": "string",
            "description": "Drop items last-modified before this date (YYYY-MM-DD). Useful for tracking what's new this week / month.",
            "default": null
          },
          "papersStartDate": {
            "title": "Daily Papers — Start Date",
            "type": "string",
            "description": "For entityType='papers' only. Pull papers from the daily-papers archive starting at this date (YYYY-MM-DD). Defaults to last 30 days.",
            "default": null
          },
          "papersEndDate": {
            "title": "Daily Papers — End Date",
            "type": "string",
            "description": "For entityType='papers' only. Pull papers up to this date (YYYY-MM-DD). Defaults to today.",
            "default": null
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}