{
  "openapi": "3.0.1",
  "info": {
    "title": "PubMed Biomedical Paper Scraper",
    "description": "Scrapes PubMed biomedical papers using the official NCBI Entrez API. Extracts full metadata including abstracts, MeSH terms, authors with affiliations, citations, grants, and more. Includes smart analytics for author networks, topic trends, and geographic distribution.",
    "version": "1.0",
    "x-build-id": "aGx4YmP0SOG4QXYmX"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/brilliant_gum~pubmed-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-brilliant_gum-pubmed-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/brilliant_gum~pubmed-scraper/runs": {
      "post": {
        "operationId": "runs-sync-brilliant_gum-pubmed-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/brilliant_gum~pubmed-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-brilliant_gum-pubmed-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "queries"
        ],
        "properties": {
          "queries": {
            "title": "Search Queries",
            "minItems": 1,
            "type": "array",
            "description": "Array of PubMed search terms. Supports full PubMed query syntax (e.g., 'cancer immunotherapy', 'COVID-19[MeSH] AND vaccine[tiab]')",
            "items": {
              "type": "string"
            }
          },
          "ncbiApiKey": {
            "title": "NCBI API Key",
            "type": "string",
            "description": "Optional NCBI API key. Without key: 3 requests/sec limit. With key: 10 requests/sec limit. Get yours at: https://www.ncbi.nlm.nih.gov/account/"
          },
          "database": {
            "title": "NCBI Database",
            "enum": [
              "pubmed",
              "pmc",
              "gene",
              "protein"
            ],
            "type": "string",
            "description": "Which NCBI database to search",
            "default": "pubmed"
          },
          "maxResults": {
            "title": "Maximum Results",
            "minimum": 1,
            "maximum": 10000,
            "type": "integer",
            "description": "Maximum number of papers to retrieve per query",
            "default": 100
          },
          "dateFrom": {
            "title": "Date From",
            "pattern": "^\\d{4}/\\d{2}/\\d{2}$",
            "type": "string",
            "description": "Filter papers published from this date (format: YYYY/MM/DD)"
          },
          "dateTo": {
            "title": "Date To",
            "pattern": "^\\d{4}/\\d{2}/\\d{2}$",
            "type": "string",
            "description": "Filter papers published up to this date (format: YYYY/MM/DD)"
          },
          "sortBy": {
            "title": "Sort Order",
            "enum": [
              "relevance",
              "pub_date",
              "cited_by_count"
            ],
            "type": "string",
            "description": "How to sort search results",
            "default": "relevance"
          },
          "pubTypes": {
            "title": "Publication Types Filter",
            "type": "array",
            "description": "Filter by publication type. Leave empty to include all types.",
            "items": {
              "type": "string"
            }
          },
          "species": {
            "title": "Species Filter",
            "enum": [
              "all",
              "human",
              "animal"
            ],
            "type": "string",
            "description": "Filter by study species",
            "default": "all"
          },
          "language": {
            "title": "Language Filter",
            "type": "array",
            "description": "Filter by publication language (ISO 639-1 codes, e.g. ['eng', 'fre']). Leave empty for all languages.",
            "items": {
              "type": "string"
            }
          },
          "includeAbstract": {
            "title": "Include Abstract",
            "type": "boolean",
            "description": "Include the full abstract text in results (structured if available)",
            "default": true
          },
          "includeMeshTerms": {
            "title": "Include MeSH Terms",
            "type": "boolean",
            "description": "Include MeSH (Medical Subject Headings) terms with major topic indicators",
            "default": true
          },
          "includeFullTextLinks": {
            "title": "Include Full Text Links",
            "type": "boolean",
            "description": "Include PMC full text URL when available (open access articles)",
            "default": true
          },
          "includeCitations": {
            "title": "Include Citations",
            "type": "boolean",
            "description": "Include approximate cited-by count and first 10 reference PMIDs (uses additional API calls)",
            "default": false
          },
          "includeAnalytics": {
            "title": "Include Smart Analytics",
            "type": "boolean",
            "description": "Generate analytics summary: author networks, topic trends, journal distribution, funding analysis, geographic distribution",
            "default": true
          },
          "batchSize": {
            "title": "Batch Size",
            "minimum": 10,
            "maximum": 200,
            "type": "integer",
            "description": "Number of records to fetch per API call (max 200 for efetch). Higher values are more efficient.",
            "default": 200
          },
          "requestDelayMs": {
            "title": "Request Delay (ms)",
            "minimum": 50,
            "type": "integer",
            "description": "Delay between API requests in milliseconds. Auto-configured based on API key presence (333ms without key, 100ms with key). Override here if needed."
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}