{
  "openapi": "3.0.1",
  "info": {
    "title": "Semantic Scholar Paper Search",
    "description": "Search and extract academic research papers from Semantic Scholar's database of over 200 million publications.",
    "version": "2.0",
    "x-build-id": "iEgMJjST3rZfofbe9"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/ryanclinton~semantic-scholar-search/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-ryanclinton-semantic-scholar-search",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/ryanclinton~semantic-scholar-search/runs": {
      "post": {
        "operationId": "runs-sync-ryanclinton-semantic-scholar-search",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/ryanclinton~semantic-scholar-search/run-sync": {
      "post": {
        "operationId": "run-sync-ryanclinton-semantic-scholar-search",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "mode": {
            "title": "Mode",
            "enum": [
              "auto",
              "search",
              "literature-review",
              "find-foundational",
              "emerging-trends",
              "compare-topics",
              "deep-analysis",
              "one-answer",
              "similar-to-papers",
              "author-papers",
              "batch-lookup",
              "snippet-search",
              "citation-graph"
            ],
            "type": "string",
            "description": "What job you want done. `auto` (default) picks the best mode from your input shape. The other options name the job directly. **Workflow modes** (search-style + opinionated output): `search` keyword search, `literature-review` topic clusters + timeline + ranked picks, `find-foundational` old + still-influential papers (yearTo=now−8, sort by influentialCitationCount), `emerging-trends` recent + fast-rising papers (yearFrom=now−2, sort by velocity). **Graph modes**: `similar-to-papers` runs the Semantic Scholar recommendation engine, `citation-graph` walks citations + references from a seed. **Lookup modes**: `author-papers` pulls every paper an author published, `batch-lookup` resolves up to 500 papers per call by DOI / arXiv ID / PubMed ID, `snippet-search` returns matching passages from open-access PDFs.",
            "default": "auto"
          },
          "collection": {
            "title": "Collection (preset)",
            "enum": [
              "top-ml-papers",
              "recent-ai-breakthroughs",
              "foundational-deep-learning",
              "highly-cited-biology",
              "oncology-recent",
              "climate-and-energy",
              "large-language-models",
              "reinforcement-learning"
            ],
            "type": "string",
            "description": "Optional. Apply a curated query + filter pack so you don't have to configure each field manually. Available: `top-ml-papers` (last 5y, ≥200 cites), `recent-ai-breakthroughs` (last 18mo, high influential), `foundational-deep-learning` (2010-2018 canon), `highly-cited-biology`, `oncology-recent`, `climate-and-energy`, `large-language-models`, `reinforcement-learning`. Your manually-set fields always win — collection only fills gaps."
          },
          "query": {
            "title": "Search Query",
            "type": "string",
            "description": "Used by `search` and (as a fallback) `snippet-search` modes. Search by paper title and abstract (e.g. 'large language models', 'protein folding').",
            "default": "large language models"
          },
          "yearFrom": {
            "title": "From Year",
            "type": "integer",
            "description": "Earliest publication year. Search-mode only.",
            "default": 2023
          },
          "yearTo": {
            "title": "To Year",
            "type": "integer",
            "description": "Latest publication year. Search-mode only."
          },
          "venue": {
            "title": "Venue",
            "type": "string",
            "description": "Filter by journal or conference (e.g. 'Nature', 'NeurIPS', 'ICML', 'ArXiv'). Search-mode only."
          },
          "fieldsOfStudy": {
            "title": "Field of Study",
            "enum": [
              "Computer Science",
              "Medicine",
              "Biology",
              "Physics",
              "Chemistry",
              "Mathematics",
              "Engineering",
              "Economics",
              "Psychology",
              "Sociology"
            ],
            "type": "string",
            "description": "Filter by academic field. Search-mode only."
          },
          "openAccessOnly": {
            "title": "Open Access Only",
            "type": "boolean",
            "description": "Only return papers with free PDFs available. Search-mode only.",
            "default": false
          },
          "minCitations": {
            "title": "Min Citations",
            "type": "integer",
            "description": "Minimum citation count. Search-mode only."
          },
          "minInfluentialCitations": {
            "title": "Min Influential Citations",
            "type": "integer",
            "description": "Semantic Scholar's ML-derived signal for citations that meaningfully build on a paper, not just casually mention it. Stricter than raw citation count and unique to S2. Search-mode only."
          },
          "sortBy": {
            "title": "Sort By",
            "enum": [
              "relevance",
              "citationCount",
              "influentialCitationCount",
              "publicationDate"
            ],
            "type": "string",
            "description": "How to sort results. `influentialCitationCount` is Semantic Scholar's signature ranking — papers cited *meaningfully*, not just frequently. Search-mode only.",
            "default": "relevance"
          },
          "seedPaperIds": {
            "title": "Seed Paper IDs",
            "maxItems": 50,
            "type": "array",
            "description": "One or more paper IDs to seed the recommendation engine. Accepts Semantic Scholar paperIds, DOIs, ARXIV:..., or PMID:... — the same identifiers Semantic Scholar resolves. Used by `similar-to-papers` mode. With multiple seeds, the actor switches to multi-paper recommendation (positive + negative blending).",
            "items": {
              "type": "string"
            }
          },
          "negativePaperIds": {
            "title": "Negative Paper IDs",
            "maxItems": 50,
            "type": "array",
            "description": "Optional. Papers to push the recommendation engine *away* from — useful when you want results similar to A and B but unlike C. Multi-paper mode only.",
            "items": {
              "type": "string"
            }
          },
          "authorName": {
            "title": "Author Name",
            "type": "string",
            "description": "Author's full name (e.g. 'Yann LeCun', 'Geoffrey Hinton'). The actor resolves this against Semantic Scholar's author index and picks the highest-paperCount match. Used by `author-papers` mode."
          },
          "authorId": {
            "title": "Author ID",
            "type": "string",
            "description": "Optional. Semantic Scholar author ID — bypasses name resolution. Find it by inspecting an `authorIds` value from a previous run."
          },
          "paperIds": {
            "title": "Paper IDs",
            "maxItems": 2000,
            "type": "array",
            "description": "List of identifiers to look up. Up to 500 per Semantic Scholar batch call; the actor chunks longer lists across multiple calls (up to 2,000 total). Accepts: bare DOI, ARXIV:..., PMID:..., or Semantic Scholar paperId. Used by `batch-lookup` mode.",
            "items": {
              "type": "string"
            }
          },
          "snippetQuery": {
            "title": "Snippet Query",
            "type": "string",
            "description": "Text to find inside open-access PDFs. Returns the actual matching passage (~500-char window) with section labels — not just an abstract. Used by `snippet-search` mode. Falls back to `query` if blank."
          },
          "seedPaperId": {
            "title": "Seed Paper ID",
            "type": "string",
            "description": "Single paper ID to walk the citation graph from. Same ID forms as `seedPaperIds`. Used by `citation-graph` mode."
          },
          "direction": {
            "title": "Direction",
            "enum": [
              "citations",
              "references",
              "both"
            ],
            "type": "string",
            "description": "`citations` = papers that cite the seed (forward). `references` = papers the seed cites (backward). `both` splits the budget. Citation-graph-mode only.",
            "default": "both"
          },
          "complexityLevel": {
            "title": "Complexity Level",
            "enum": [
              "low",
              "medium",
              "high"
            ],
            "type": "string",
            "description": "Controls how much output gets pushed to the dataset. `low` — only `one-answer`, `top-picks`, and `summary` records (compact, exec-friendly). `medium` — adds `recommended-actions`, `research-map`, `blindspots`, `search-insights`, `analysis-pack`. `high` (default) — full dataset with every analytical record. Use `low` when piping into Slack / agent tool calls / dashboards. Use `high` when running interactively or feeding a downstream analytics pipeline.",
            "default": "high"
          },
          "continuousMode": {
            "title": "Continuous Mode (deltas only)",
            "type": "boolean",
            "description": "Opt-in for scheduled monitoring runs. When `true` AND `monitoringStateKey` is set AND not the first run, only papers that are `isNew=true` OR have `citationDelta>=5` will be pushed. Analytical records (top-picks, research-map, etc.) still emit. Slashes per-run cost on monitoring schedules where most papers haven't changed.",
            "default": false
          },
          "outputFormat": {
            "title": "Output Format",
            "enum": [
              "standard",
              "analysis-pack"
            ],
            "type": "string",
            "description": "`standard` (default) — emit all record types as configured. `analysis-pack` — also emit a single ready-to-use `analysis-pack` record at the end with summary + topFindings + keyPapers + trendNarrative + risks + recommendedActions, suitable for pasting directly into a report or LLM prompt without post-processing. The standard records are still emitted alongside.",
            "default": "standard"
          },
          "userIntent": {
            "title": "User Intent (scoring persona)",
            "enum": [
              "researcher",
              "engineer",
              "investor",
              "student"
            ],
            "type": "string",
            "description": "Optional. Switches the impactScore weighting to match your role. `researcher` (default) — citations 55% + influential 25% + velocity 20%. `engineer` — velocity 45% + citations 30% + influential 25% (recency-heavy). `investor` — velocity 50% + citations 25% + influential 25% (momentum-focused). `student` — citations 50% + influential 35% + velocity 15% (foundational-leaning). The active weights are surfaced in logs and on every paper record."
          },
          "queries": {
            "title": "Queries (compare-topics)",
            "maxItems": 8,
            "type": "array",
            "description": "List of 2–8 query strings to compare side-by-side. Used by `compare-topics` mode. Each query runs a separate search and the actor emits a single `topic-comparison` record showing avg impact, breakout count, foundational count, top paper, and growth-vs-last-run per topic.",
            "items": {
              "type": "string"
            }
          },
          "userProfileKey": {
            "title": "User Profile Key",
            "type": "string",
            "description": "Optional. Set to any string (e.g. `my-research-focus`) to enable lightweight cross-run personalisation. The actor stores the queries you've searched, clusters that have appeared in your runs, and papers that have been returned (FIFO-bounded: 200 queries / 500 clusters / 5,000 papers). Subsequent runs bias the top-picks ranking toward papers in known clusters (up to +20 impact-equivalent boost based on engagement frequency). The summary record carries a `userProfile` aggregate. Distinct from `monitoringStateKey` — monitoring tracks paper deltas; user profile tracks YOUR engagement."
          },
          "monitoringStateKey": {
            "title": "Monitoring State Key",
            "type": "string",
            "description": "Optional. Set to any string (e.g. `weekly-llm-watch`) to turn on cross-run monitoring. The actor stores a snapshot of the papers it returned in a named KV store and on the next run with the same key, every paper record carries `isNew` (true if not seen before), `citationDelta` (citation count change since last run), `previousCitationCount`, and `daysSinceLastSeen`. The summary record carries an aggregate `monitoring` object with new/growing paper counts. First run with a new key sees all `isNew=null` (no prior state). Snapshot capped at 5,000 papers, FIFO. Use one key per scheduled job."
          },
          "apiKey": {
            "title": "Semantic Scholar API Key",
            "type": "string",
            "description": "Optional API key for higher rate limits. Request one at https://www.semanticscholar.org/product/api — unauthenticated users share a small global pool and will hit 429 errors faster on large jobs."
          },
          "maxResults": {
            "title": "Max Results",
            "minimum": 1,
            "maximum": 1000,
            "type": "integer",
            "description": "Maximum number of records to return. Caps: search 1000, recommendations 500, author-papers 1000, batch-lookup 500, snippet-search 100, citation-graph 1000.",
            "default": 50
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}