{
  "openapi": "3.0.1",
  "info": {
    "title": "Hugging Face Scraper - Models, Datasets, Papers",
    "description": "Hugging Face data export tool: scrape models, datasets & daily papers without a token. Export to CSV/JSON. A no-login Hugging Face API alternative.",
    "version": "1.0",
    "x-build-id": "FlqgBuMipun9Bv9nL"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/logiover~huggingface-hub-intelligence-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-logiover-huggingface-hub-intelligence-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/logiover~huggingface-hub-intelligence-scraper/runs": {
      "post": {
        "operationId": "runs-sync-logiover-huggingface-hub-intelligence-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/logiover~huggingface-hub-intelligence-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-logiover-huggingface-hub-intelligence-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "entityType": {
            "title": "Entity Type",
            "enum": [
              "models",
              "datasets",
              "spaces",
              "papers",
              "collections"
            ],
            "type": "string",
            "description": "Which entity to enumerate from the Hugging Face Hub. 'models' = ML models (~1M+), 'datasets' = training/evaluation datasets (~200k+), 'spaces' = hosted demo apps, 'papers' = daily curated research papers, 'collections' = curated lists of models/datasets/spaces.",
            "default": "models"
          },
          "search": {
            "title": "Search Query",
            "type": "string",
            "description": "Free-text search over name + description. Examples: 'llama', 'whisper', 'mistral', 'phi', 'instruct'. Leave empty to enumerate all entities matching filters.",
            "default": ""
          },
          "author": {
            "title": "Author / Organization Filter",
            "type": "string",
            "description": "Restrict to a single author or organization (e.g. 'mistralai', 'meta-llama', 'openai-community', 'stabilityai'). Substring match.",
            "default": ""
          },
          "pipelineTag": {
            "title": "Task / Pipeline Tag (models only)",
            "type": "string",
            "description": "Filter models by primary task. Common values: 'text-generation', 'text-classification', 'token-classification', 'feature-extraction', 'sentence-similarity', 'fill-mask', 'translation', 'summarization', 'question-answering', 'image-to-text', 'text-to-image', 'image-classification', 'object-detection', 'audio-classification', 'automatic-speech-recognition' (ASR), 'text-to-speech' (TTS), 'reinforcement-learning'.",
            "default": ""
          },
          "library": {
            "title": "Library Filter (models / datasets)",
            "type": "string",
            "description": "Filter by library. Common: 'transformers', 'diffusers', 'sentence-transformers', 'gguf', 'mlx', 'onnx', 'safetensors', 'pytorch', 'jax', 'tensorflow', 'datasets'.",
            "default": ""
          },
          "language": {
            "title": "Language Tag (models / datasets)",
            "type": "string",
            "description": "Filter by primary language. ISO 639-1 codes ('en', 'fr', 'de', 'tr', 'zh', 'ja', 'es') or special: 'multilingual'.",
            "default": ""
          },
          "tags": {
            "title": "Additional Tags Filter",
            "type": "array",
            "description": "Restrict to items whose tag list contains all of these. Examples: ['safetensors','region:us'], ['license:apache-2.0','llama'], ['gated:false']. Hugging Face supports complex tag filters at the API level.",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "sort": {
            "title": "Sort By",
            "enum": [
              "downloads",
              "likes",
              "lastModified",
              "createdAt",
              "trendingScore"
            ],
            "type": "string",
            "description": "Field to sort by. 'downloads' = most downloaded, 'likes' = community favorites, 'lastModified' = recently updated, 'createdAt' = newest creations, 'trendingScore' = HF's trending algorithm.",
            "default": "downloads"
          },
          "sortDirection": {
            "title": "Sort Direction",
            "enum": [
              "-1",
              "1"
            ],
            "type": "string",
            "description": "Sort direction: '-1' = descending (highest first), '1' = ascending.",
            "default": "-1"
          },
          "maxResults": {
            "title": "Maximum Results",
            "minimum": 1,
            "maximum": 1000000,
            "type": "integer",
            "description": "Hard cap on records returned. Set to 0 for unlimited (auto-paginates). Be aware: the full models catalog is ~1M+ items.",
            "default": 500
          },
          "fetchDetails": {
            "title": "Fetch Full Details Per Item",
            "type": "boolean",
            "description": "When enabled, for every model / dataset / space / paper, the actor makes an additional call to `/api/{type}/{id}` to fetch richer fields: full README content, sibling file list, model card data, dataset card data, config files, gated status, license, citation. Adds 1 HTTP request per item.",
            "default": false
          },
          "fetchReadme": {
            "title": "Fetch README Content",
            "type": "boolean",
            "description": "When enabled (requires 'Fetch Full Details'), the actor also pulls the raw model/dataset card (README.md) for each item. Useful for AI training datasets, model documentation indexing, and RAG over the Hub.",
            "default": false
          },
          "minDownloads": {
            "title": "Minimum Downloads (client-side filter)",
            "minimum": 0,
            "type": "integer",
            "description": "Drop entities with fewer than this many downloads. Useful for filtering out abandoned or experimental items.",
            "default": 0
          },
          "minLikes": {
            "title": "Minimum Likes (client-side filter)",
            "minimum": 0,
            "type": "integer",
            "description": "Drop entities with fewer than this many likes.",
            "default": 0
          },
          "modifiedFrom": {
            "title": "Modified From (ISO date)",
            "type": "string",
            "description": "Drop items last-modified before this date (YYYY-MM-DD). Useful for tracking what's new this week / month.",
            "default": null
          },
          "papersStartDate": {
            "title": "Daily Papers — Start Date",
            "type": "string",
            "description": "For entityType='papers' only. Pull papers from the daily-papers archive starting at this date (YYYY-MM-DD). Defaults to last 30 days.",
            "default": null
          },
          "papersEndDate": {
            "title": "Daily Papers — End Date",
            "type": "string",
            "description": "For entityType='papers' only. Pull papers up to this date (YYYY-MM-DD). Defaults to today.",
            "default": null
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}