{
  "openapi": "3.0.1",
  "info": {
    "title": "Semantic Scholar Scraper",
    "description": "Scrape Semantic Scholar with 200M+ academic papers and authors with full citation graph. Search, fetch by paper/author ID, get citations / references / recommendations, with abstracts, TLDRs, fields-of-study, open-access PDFs, h-index, affiliations, and more",
    "version": "1.0",
    "x-build-id": "6Ux96rY9ZEia5xQr5"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/crawlerbros~semanticscholar-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-crawlerbros-semanticscholar-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/crawlerbros~semanticscholar-scraper/runs": {
      "post": {
        "operationId": "runs-sync-crawlerbros-semanticscholar-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/crawlerbros~semanticscholar-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-crawlerbros-semanticscholar-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "mode"
        ],
        "properties": {
          "mode": {
            "title": "Mode",
            "enum": [
              "searchPaper",
              "searchPaperBulk",
              "byPaper",
              "byPaperCitations",
              "byPaperReferences",
              "searchAuthor",
              "byAuthor",
              "byAuthorPapers",
              "recommendations",
              "byUrl"
            ],
            "type": "string",
            "description": "What to fetch.",
            "default": "byPaper"
          },
          "searchQuery": {
            "title": "Search query",
            "type": "string",
            "description": "Free-text query for searchPaper / searchPaperBulk / searchAuthor modes. Examples: `transformer attention`, `geoffrey hinton`, `crispr cas9`.",
            "default": "transformer attention"
          },
          "year": {
            "title": "Year filter (search modes)",
            "type": "string",
            "description": "Filter papers by publication year. Accepts a single year (`2023`), an open range (`2018-`, `-2010`), or a closed range (`2015-2020`)."
          },
          "fieldsOfStudy": {
            "title": "Fields of study (search modes)",
            "uniqueItems": true,
            "type": "array",
            "description": "Filter to one or more Semantic Scholar fields-of-study. Multi-select.",
            "items": {
              "type": "string",
              "enum": [
                "Computer Science",
                "Medicine",
                "Chemistry",
                "Biology",
                "Materials Science",
                "Physics",
                "Geology",
                "Psychology",
                "Art",
                "History",
                "Geography",
                "Sociology",
                "Business",
                "Political Science",
                "Economics",
                "Philosophy",
                "Mathematics",
                "Engineering",
                "Environmental Science",
                "Agricultural and Food Sciences",
                "Education",
                "Law",
                "Linguistics"
              ],
              "enumTitles": [
                "Computer Science",
                "Medicine",
                "Chemistry",
                "Biology",
                "Materials Science",
                "Physics",
                "Geology",
                "Psychology",
                "Art",
                "History",
                "Geography",
                "Sociology",
                "Business",
                "Political Science",
                "Economics",
                "Philosophy",
                "Mathematics",
                "Engineering",
                "Environmental Science",
                "Agricultural and Food Sciences",
                "Education",
                "Law",
                "Linguistics"
              ]
            },
            "default": []
          },
          "publicationTypes": {
            "title": "Publication types (search modes)",
            "uniqueItems": true,
            "type": "array",
            "description": "Filter by publication type. Multi-select.",
            "items": {
              "type": "string",
              "enum": [
                "Review",
                "JournalArticle",
                "CaseReport",
                "ClinicalTrial",
                "Conference",
                "Dataset",
                "Editorial",
                "LettersAndComments",
                "MetaAnalysis",
                "News",
                "Study",
                "Book",
                "BookSection"
              ],
              "enumTitles": [
                "Review",
                "Journal Article",
                "Case Report",
                "Clinical Trial",
                "Conference",
                "Dataset",
                "Editorial",
                "Letters / Comments",
                "Meta-Analysis",
                "News",
                "Study",
                "Book",
                "Book Section"
              ]
            },
            "default": []
          },
          "venues": {
            "title": "Venues (search modes)",
            "type": "array",
            "description": "Filter by publication venue (e.g. `Nature`, `NeurIPS`, `IEEE Transactions on Pattern Analysis`).",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "openAccessOnly": {
            "title": "Open-access only (search modes)",
            "type": "boolean",
            "description": "Drop papers without an open-access PDF.",
            "default": false
          },
          "minCitationCount": {
            "title": "Min citation count (search modes)",
            "minimum": 0,
            "maximum": 10000000,
            "type": "integer",
            "description": "Drop papers with fewer than this many citations."
          },
          "sort": {
            "title": "Sort (searchPaperBulk only)",
            "enum": [
              "",
              "relevance",
              "citationCount:desc",
              "citationCount:asc",
              "publicationDate:desc",
              "publicationDate:asc"
            ],
            "type": "string",
            "description": "Sort order for bulk search. Relevance-ranked search uses an internal score and ignores this field.",
            "default": ""
          },
          "paperIds": {
            "title": "Paper IDs (byPaper / byPaperCitations / byPaperReferences / recommendations)",
            "type": "array",
            "description": "Semantic Scholar paper IDs (40-char hex), or prefixed external IDs: `DOI:10.1145/...`, `ARXIV:1706.03762`, `MAG:...`, `PMID:...`, `PMCID:PMC...`, `ACL:...`. Bare DOIs and arXiv IDs are auto-prefixed.",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "authorIds": {
            "title": "Author IDs (byAuthor / byAuthorPapers)",
            "type": "array",
            "description": "Numeric Semantic Scholar author IDs (e.g. `1741101`).",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "urls": {
            "title": "URLs (byUrl mode)",
            "type": "array",
            "description": "Semantic Scholar / DOI / arXiv URLs. Examples: `https://www.semanticscholar.org/paper/<sha>`, `https://arxiv.org/abs/1706.03762`, `https://doi.org/10.1145/...`, `https://www.semanticscholar.org/author/1741101`.",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "includeCitationsOnPaper": {
            "title": "Include citations on paper records (byPaper)",
            "type": "boolean",
            "description": "Embed up to 200 citing-paper summaries inside each paper record (byPaper mode only). Increases payload size.",
            "default": false
          },
          "includeReferencesOnPaper": {
            "title": "Include references on paper records (byPaper)",
            "type": "boolean",
            "description": "Embed up to 200 cited-paper summaries inside each paper record (byPaper mode only).",
            "default": false
          },
          "paperFields": {
            "title": "Custom paper fields (advanced)",
            "type": "array",
            "description": "Override the default `fields=` list sent to the API for paper endpoints. Leave empty to use the curated default. See https://api.semanticscholar.org/api-docs/graph for the full list.",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "authorFields": {
            "title": "Custom author fields (advanced)",
            "type": "array",
            "description": "Override the default `fields=` list sent to the API for author endpoints.",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "semanticScholarApiKey": {
            "title": "Semantic Scholar API key (optional)",
            "type": "string",
            "description": "Raises rate limits 10x. Free signup: https://www.semanticscholar.org/product/api#api-key-form. The actor works without a key."
          },
          "requestDelaySeconds": {
            "title": "Request delay (seconds)",
            "minimum": 0,
            "maximum": 30,
            "type": "integer",
            "description": "Delay between API calls. Defaults to 0 (with API key) or 2 (without). Honour the source's rate-limit budget."
          },
          "maxItems": {
            "title": "Max items",
            "minimum": 1,
            "maximum": 10000,
            "type": "integer",
            "description": "Hard cap on emitted records.",
            "default": 50
          },
          "useProxy": {
            "title": "Use Apify proxy",
            "type": "boolean",
            "description": "Force routing through Apify proxy from the first request. Recommended when running without an API key from datacenter IPs (Apify cloud is rate-limited heavily by Semantic Scholar's free tier).",
            "default": false
          },
          "autoEscalateOnBlock": {
            "title": "Auto-escalate to proxy on rate-limit",
            "type": "boolean",
            "description": "If the API responds 429 from direct IP, automatically rotate through Apify proxy sessions for the rest of the run. Helps avoid getting stuck on the unauthenticated 100-req/5-min cap from a single datacenter IP.",
            "default": true
          },
          "proxyConfiguration": {
            "title": "Proxy configuration",
            "type": "object",
            "description": "Apify proxy configuration. Used only when useProxy is enabled. Auto-escalation falls back to the default Apify proxy group automatically.",
            "default": {
              "useApifyProxy": true,
              "apifyProxyGroups": []
            }
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}