{
  "openapi": "3.0.1",
  "info": {
    "title": "Semantic Scholar Scraper",
    "description": "[💰 $6 / 1K] Extract academic papers, abstracts, citations, references, authors, and open-access PDF links from Semantic Scholar's 200M+ database. Search by keyword, paper ID/DOI/URL, or author. Filter by year, field, and citations. No API key.",
    "version": "1.0",
    "x-build-id": "zGQzgkvOuoM9F66Uk"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/solidcode~semanticscholar-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-solidcode-semanticscholar-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/solidcode~semanticscholar-scraper/runs": {
      "post": {
        "operationId": "runs-sync-solidcode-semanticscholar-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/solidcode~semanticscholar-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-solidcode-semanticscholar-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "searchQueries": {
            "title": "Search Queries",
            "type": "array",
            "description": "Keywords to search across paper titles and abstracts (e.g., 'large language models' or 'CRISPR gene editing'). Each query produces its own set of paper results. Leave empty if you only want to fetch specific papers or authors.",
            "items": {
              "type": "string"
            }
          },
          "paperIds": {
            "title": "Paper IDs, DOIs, or URLs",
            "type": "array",
            "description": "Fetch specific papers directly. Accepts Semantic Scholar paper IDs (40-char hex), DOIs (e.g., '10.1038/nature14539'), arXiv IDs (e.g., 'arXiv:1706.03762'), or full Semantic Scholar paper URLs. One record is returned per paper.",
            "items": {
              "type": "string"
            }
          },
          "authorIds": {
            "title": "Author IDs or Profile URLs",
            "type": "array",
            "description": "Semantic Scholar author IDs (numeric, e.g., '1741101') or full author profile URLs. Returns an author profile record (name, affiliation, h-index, citation count). Enable 'Include Author Papers' below to also pull each author's publications.",
            "items": {
              "type": "string"
            }
          },
          "maxResults": {
            "title": "Maximum Results per Query",
            "minimum": 0,
            "maximum": 10000,
            "type": "integer",
            "description": "Maximum number of papers to return per search query. This is an exact cap — you are charged for at most this many results per query. Set to 0 for all available results (capped at 10,000 per query).",
            "default": 100
          },
          "yearFrom": {
            "title": "Year From",
            "minimum": 1900,
            "maximum": 2100,
            "type": "integer",
            "description": "Only include papers published in this year or later. Leave empty for no lower bound."
          },
          "yearTo": {
            "title": "Year To",
            "minimum": 1900,
            "maximum": 2100,
            "type": "integer",
            "description": "Only include papers published in this year or earlier. Leave empty for no upper bound."
          },
          "fieldsOfStudy": {
            "title": "Fields of Study",
            "uniqueItems": true,
            "type": "array",
            "description": "Restrict results to one or more research fields. Leave empty to include all fields.",
            "items": {
              "type": "string",
              "enum": [
                "Computer Science",
                "Medicine",
                "Chemistry",
                "Biology",
                "Materials Science",
                "Physics",
                "Geology",
                "Psychology",
                "Art",
                "History",
                "Geography",
                "Sociology",
                "Business",
                "Political Science",
                "Economics",
                "Philosophy",
                "Mathematics",
                "Engineering",
                "Environmental Science",
                "Agricultural and Food Sciences",
                "Education",
                "Law",
                "Linguistics"
              ],
              "enumTitles": [
                "Computer Science",
                "Medicine",
                "Chemistry",
                "Biology",
                "Materials Science",
                "Physics",
                "Geology",
                "Psychology",
                "Art",
                "History",
                "Geography",
                "Sociology",
                "Business",
                "Political Science",
                "Economics",
                "Philosophy",
                "Mathematics",
                "Engineering",
                "Environmental Science",
                "Agricultural and Food Sciences",
                "Education",
                "Law",
                "Linguistics"
              ]
            }
          },
          "publicationTypes": {
            "title": "Publication Types",
            "uniqueItems": true,
            "type": "array",
            "description": "Restrict results to one or more publication types (e.g., only peer-reviewed journal articles). Leave empty to include all types.",
            "items": {
              "type": "string",
              "enum": [
                "Review",
                "JournalArticle",
                "CaseReport",
                "ClinicalTrial",
                "Conference",
                "Dataset",
                "Editorial",
                "LettersAndComments",
                "MetaAnalysis",
                "News",
                "Study",
                "Book"
              ],
              "enumTitles": [
                "Review",
                "Journal Article",
                "Case Report",
                "Clinical Trial",
                "Conference",
                "Dataset",
                "Editorial",
                "Letters & Comments",
                "Meta-Analysis",
                "News",
                "Study",
                "Book"
              ]
            }
          },
          "openAccessOnly": {
            "title": "Open Access PDFs Only",
            "type": "boolean",
            "description": "Only return papers that have a free, downloadable open-access PDF.",
            "default": false
          },
          "minCitationCount": {
            "title": "Minimum Citation Count",
            "minimum": 0,
            "type": "integer",
            "description": "Only return papers cited at least this many times. Leave empty for no minimum. Useful for surfacing high-impact work."
          },
          "sortBy": {
            "title": "Sort By",
            "enum": [
              "relevance",
              "citationCount",
              "publicationDate"
            ],
            "type": "string",
            "description": "How to order paper search results. 'Relevance' returns Semantic Scholar's default result order (it is not a true relevance ranking — bulk search has no relevance score), 'Most cited first' sorts by citation count, and 'Most recent first' sorts by publication date.",
            "default": "relevance"
          },
          "includeAbstracts": {
            "title": "Include Abstracts",
            "type": "boolean",
            "description": "Include the abstract text for each paper. Disable to reduce dataset size.",
            "default": true
          },
          "includeReferences": {
            "title": "Include References",
            "type": "boolean",
            "description": "For each paper, also output the papers it cites (its reference list) as separate records. WARNING: a single paper can have hundreds of references — this can multiply your total result count and cost.",
            "default": false
          },
          "includeCitations": {
            "title": "Include Citing Papers",
            "type": "boolean",
            "description": "For each paper, also output the papers that cite it as separate records. WARNING: highly-cited papers can have tens of thousands of citing papers — this can dramatically multiply your total result count and cost. Use the cap below to bound it.",
            "default": false
          },
          "maxCitationsPerPaper": {
            "title": "Max Citing / Referenced Papers per Paper",
            "minimum": 1,
            "maximum": 1000,
            "type": "integer",
            "description": "When 'Include Citing Papers' or 'Include References' is on, this caps how many child papers are fetched per source paper. Default 50; higher values linearly increase runtime and cost.",
            "default": 50
          },
          "includeAuthorPapers": {
            "title": "Include Author Papers",
            "type": "boolean",
            "description": "When you provide author IDs/URLs, also output each author's publications as separate paper records. WARNING: prolific authors can have thousands of papers — this can multiply your total result count and cost.",
            "default": false
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}