{
  "openapi": "3.0.1",
  "info": {
    "title": "Kaggle Datasets Scraper",
    "description": "Extract Kaggle dataset metadata at scale: titles, owners, descriptions, tags, license, file types, sizes, downloads, views, and votes. Filter by search, tag, user, file type, or size.",
    "version": "0.2",
    "x-build-id": "VT1llegeKRXemnZx2"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/parseforge~kaggle-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-parseforge-kaggle-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/parseforge~kaggle-scraper/runs": {
      "post": {
        "operationId": "runs-sync-parseforge-kaggle-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/parseforge~kaggle-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-parseforge-kaggle-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "search": {
            "title": "Search keyword",
            "type": "string",
            "description": "Free-text query against dataset titles, descriptions, and tags. Leave blank to browse without a keyword (use sortBy / fileType to control discovery)."
          },
          "maxItems": {
            "title": "Max Items",
            "minimum": 1,
            "maximum": 1000000,
            "type": "integer",
            "description": "Free users: Limited to 10 items (preview). Paid users: Optional, max 1,000,000"
          },
          "sortBy": {
            "title": "Sort by",
            "enum": [
              "hottest",
              "votes",
              "updated",
              "active",
              "published"
            ],
            "type": "string",
            "description": "Order results by Kaggle built-in sort orders.",
            "default": "hottest"
          },
          "fileType": {
            "title": "File type",
            "enum": [
              "all",
              "csv",
              "json",
              "sqlite",
              "bigQuery"
            ],
            "type": "string",
            "description": "Restrict results to datasets containing the chosen file format. all returns every format.",
            "default": "all"
          },
          "license": {
            "title": "License",
            "enum": [
              "all",
              "cc",
              "gpl",
              "odb",
              "other"
            ],
            "type": "string",
            "description": "Restrict results to datasets shared under the chosen license. all returns every license.",
            "default": "all"
          },
          "tag": {
            "title": "Tag",
            "enum": [
              "1x1 convolution",
              "accelerators",
              "advanced",
              "adversarial learning",
              "aesthetic quality",
              "africa",
              "agriculture",
              "alcohol",
              "amharic",
              "animals",
              "anime and manga",
              "antarctica",
              "arabic",
              "art",
              "artificial intelligence",
              "arts and entertainment",
              "asia",
              "assamese",
              "astronomy",
              "atmospheric science",
              "attention dropout",
              "audio",
              "audio classification",
              "audio command detection",
              "audio event classification",
              "audio synthesis",
              "audio-to-audio",
              "australia",
              "auto racing",
              "auto-updating data",
              "automatic speech recognition",
              "automl",
              "automobiles and vehicles",
              "auxiliary classifier",
              "aviation",
              "banking",
              "baseball",
              "basketball",
              "batch normalization",
              "bayesian statistics",
              "beginner",
              "benchmark",
              "benchmark dataset",
              "bengali",
              "bert",
              "bigan",
              "bigbigan",
              "bigquery",
              "binary classification",
              "biology",
              "biotechnology",
              "board games",
              "brazil",
              "business",
              "canada",
              "cancer",
              "card games",
              "catboost",
              "categorical",
              "celebrities",
              "chemistry",
              "chichewa",
              "china",
              "chinese",
              "chinese (taiwan)",
              "cities and urban areas",
              "classification",
              "clothing and accessories",
              "clustering",
              "cnn",
              "coding",
              "comics and animation",
              "computer science",
              "computer vision",
              "convnext",
              "convolution",
              "cooking and recipes",
              "corrplot",
              "covid19",
              "cricket",
              "crime",
              "crowdfunding",
              "culture and humanities",
              "currencies and foreign exchange",
              "cv2",
              "cyber security",
              "cycling",
              "dailychallenge",
              "dance",
              "data analytics",
              "data cleaning",
              "data storytelling",
              "data type",
              "data visualization",
              "datetime",
              "decision tree",
              "deep learning",
              "deit",
              "demographics",
              "denoising",
              "densenet",
              "dentistry",
              "deserts",
              "diabetes",
              "dimensionality reduction",
              "diseases",
              "dnn",
              "doParallel",
              "dplyr",
              "dropout",
              "drugs and medications",
              "dutch",
              "e-commerce services",
              "ears and hearing",
              "earth and nature",
              "earth science",
              "economics",
              "education",
              "efficientnet",
              "efficientnet-b7",
              "efficientnetv2",
              "electricity",
              "electronics",
              "email and messaging",
              "employment",
              "energy",
              "engineering",
              "english",
              "ensembling",
              "environment",
              "europe",
              "evaluation",
              "exercise",
              "exploratory data analysis",
              "eyes and vision",
              "feature engineering",
              "feature extraction",
              "finance",
              "finnish",
              "fish and aquaria",
              "food",
              "football",
              "forcats",
              "forestry",
              "french",
              "gambling",
              "games",
              "gan",
              "gender",
              "general knowledge and reasoning",
              "genetics",
              "geography",
              "geography and places",
              "geology",
              "geospatial analysis",
              "german",
              "ggplot2",
              "global",
              "golf",
              "government",
              "gpt2",
              "gpu",
              "gradient boosting",
              "graph",
              "graph neural network",
              "greenland",
              "gymnastics",
              "health",
              "health and fitness",
              "health conditions",
              "healthcare",
              "heart conditions",
              "hindi",
              "history",
              "hockey",
              "holidays and cultural events",
              "hospitals and treatment centers",
              "hotels and accommodations",
              "housing",
              "hugging face",
              "human rights",
              "image",
              "image augmentation",
              "image classification",
              "image classification logits",
              "image generator",
              "image segmentation",
              "image style transfer",
              "image super resolution",
              "image text detection",
              "image text recognition",
              "image-to-image",
              "image-to-text",
              "income",
              "india",
              "indonesian",
              "insurance",
              "intermediate",
              "international relations",
              "internet",
              "investing",
              "IPython",
              "italian",
              "japan",
              "japanese",
              "jobs and career",
              "json",
              "k-means",
              "keras",
              "knn",
              "korea",
              "korean",
              "language",
              "languages",
              "law",
              "learn",
              "lending",
              "lightgbm",
              "linear regression",
              "linguistics",
              "literature",
              "logistic regression",
              "lstm",
              "lubridate",
              "make-up and cosmetics",
              "manufacturing",
              "marketing",
              "martial arts",
              "mask r-cnn",
              "math",
              "mathematics",
              "matplotlib",
              "medicine",
              "mental health",
              "mexico",
              "middle east",
              "military",
              "ml ethics",
              "mobile and wireless",
              "mobilenet v2",
              "mobilenetv3",
              "model comparison",
              "model explainability",
              "mortality",
              "mountains",
              "movies and tv shows",
              "multi-head attention",
              "multiclass classification",
              "multilabel classification",
              "multilingual",
              "multimodal",
              "museums",
              "music",
              "naive bayes",
              "natural disasters",
              "neural networks",
              "neuroscience",
              "news",
              "nlp",
              "nltk",
              "north america",
              "numpy",
              "nutrition",
              "object detection",
              "oceania",
              "oil and gas",
              "online communities",
              "optimization",
              "other",
              "outlier analysis",
              "pandas",
              "pca",
              "people",
              "people and society",
              "persian",
              "philosophy",
              "physical science",
              "physics",
              "PIL",
              "pitch extraction",
              "plants",
              "plotly",
              "polish",
              "politics",
              "pollution",
              "popular culture",
              "portuguese",
              "pose detection",
              "pre-trained model",
              "primary and secondary schools",
              "programming",
              "psychology",
              "public health",
              "public safety",
              "puzzles",
              "python",
              "pytorch",
              "question answering",
              "r",
              "racial equity",
              "rail transport",
              "random forest",
              "randomForest",
              "ratings and reviews",
              "re",
              "real estate",
              "recommender systems",
              "regression",
              "reinforcement learning",
              "religion and belief systems",
              "renewable energy",
              "research",
              "residual block",
              "resnet",
              "restaurants",
              "retail and shopping",
              "retinanet",
              "retrieval question answering",
              "retrieval/ranking",
              "rnn",
              "roberta",
              "robotics",
              "russia",
              "russian",
              "sam",
              "sampling",
              "science and technology",
              "scipy",
              "seaborn",
              "search engines",
              "segmentation",
              "sentence similarity",
              "signal processing",
              "simulations",
              "sklearn",
              "slovenian",
              "social issues and advocacy",
              "social networks",
              "social science",
              "socrata",
              "software",
              "south america",
              "spaCy",
              "spanish",
              "speech synthesis",
              "speech-to-text",
              "sports",
              "sql",
              "standardized testing",
              "statistical analysis",
              "summarization",
              "sundanese",
              "survey analysis",
              "svm",
              "swedish",
              "synthetic",
              "t5",
              "tabular",
              "tabular classification",
              "tamil",
              "tennis",
              "tensorflow",
              "text",
              "text classification",
              "text conversation",
              "text fill-mask",
              "text generation",
              "text mining",
              "text pre-processing",
              "text segmentation",
              "text sequence alignment",
              "text-to-image",
              "text-to-speech",
              "text-to-text generation",
              "thai",
              "tibble",
              "tidyverse",
              "time series analysis",
              "token classification",
              "torchvision",
              "tpu",
              "transfer learning",
              "transformer",
              "transformers",
              "translation",
              "transportation",
              "travel",
              "turkish",
              "twi",
              "Two Sigma x Kaggle Finance Data Repo",
              "ukrainian",
              "unet",
              "united states",
              "universities and colleges",
              "urban planning",
              "urdu",
              "uzbek",
              "vae",
              "vgg-style",
              "video",
              "video classification",
              "video games",
              "video generation",
              "vietnamese",
              "vision transformer",
              "water bodies",
              "water sports",
              "water transport",
              "weather and climate",
              "websites",
              "whisper",
              "word2vec skip-gram",
              "xgboost",
              "yolo",
              "yolov5",
              "yolov8",
              "zero-shot text classification"
            ],
            "type": "string",
            "description": "Filter by a Kaggle tag slug. Leave blank for no tag filter. The list is harvested from the Kaggle public dataset API and covers the standard subject, technique, task, and topic taxonomy."
          },
          "user": {
            "title": "Kaggle user",
            "type": "string",
            "description": "Filter to datasets owned by a single Kaggle user or organisation slug (the part after kaggle.com/, e.g. timoboz, mlg-ulb, organizations/google). Leave blank for no user filter."
          },
          "minSize": {
            "title": "Min size (bytes)",
            "minimum": 0,
            "type": "integer",
            "description": "Lower bound on the dataset total uncompressed size in bytes."
          },
          "maxSize": {
            "title": "Max size (bytes)",
            "minimum": 0,
            "type": "integer",
            "description": "Upper bound on the dataset total uncompressed size in bytes."
          },
          "includeDescription": {
            "title": "Include full description",
            "type": "boolean",
            "description": "Fetch the dataset detail endpoint for each record to populate description, tags, and versions. When false, those fields stay null and the run is faster.",
            "default": true
          },
          "proxyConfiguration": {
            "title": "Proxy configuration",
            "type": "object",
            "description": "Optional Apify Proxy configuration. The Kaggle API tolerates direct calls but a residential or datacenter pool is recommended for large jobs."
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}