{
  "openapi": "3.0.1",
  "info": {
    "title": "📰 Google News Extractor / Scraper",
    "description": "Extract Google News articles by keyword or topic. No login, API key, or cookies required. Bulk search queries, 50+ regions, full-text extraction for AI/RAG, deduplication, MCP-optimized output schema. Export to JSON/CSV or integrate via API.",
    "version": "0.1",
    "x-build-id": "8yqko3Zb7fWSPymeD"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/khadinakbar~google-news-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-khadinakbar-google-news-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/khadinakbar~google-news-scraper/runs": {
      "post": {
        "operationId": "runs-sync-khadinakbar-google-news-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/khadinakbar~google-news-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-khadinakbar-google-news-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "searchQueries": {
            "title": "Search Queries (Keywords)",
            "type": "array",
            "description": "List of search keywords or phrases to look up on Google News. Each query fetches its own RSS feed. Supports Google search operators: use quotes for exact match (\"climate change\"), minus to exclude (-bitcoin), OR for alternatives (AI OR \"machine learning\"), site: to filter by publisher (site:reuters.com). Leave empty if using topics, topicUrls, or startUrls instead.",
            "items": {
              "type": "string"
            }
          },
          "topics": {
            "title": "Built-in Google News Topics",
            "type": "array",
            "description": "Select from Google News built-in topic sections. Each selected topic fetches its own RSS feed of top headlines. Use this for broad category monitoring without keywords. Can be combined with searchQueries.",
            "items": {
              "type": "string",
              "enum": [
                "WORLD",
                "NATION",
                "BUSINESS",
                "TECHNOLOGY",
                "ENTERTAINMENT",
                "SPORTS",
                "SCIENCE",
                "HEALTH"
              ],
              "enumTitles": [
                "🌍 World",
                "🗽 Nation (US)",
                "💼 Business",
                "💻 Technology",
                "🎬 Entertainment",
                "⚽ Sports",
                "🔬 Science",
                "❤️ Health"
              ]
            }
          },
          "topicUrls": {
            "title": "Custom Google News Section URLs",
            "type": "array",
            "description": "Advanced: Paste the URL of any Google News section, topic page, or custom RSS feed directly. Both HTML page URLs (https://news.google.com/topics/...) and RSS URLs (https://news.google.com/rss/topics/...) are accepted — they are automatically converted. Use this for niche topics not covered by built-in topic sections.",
            "items": {
              "type": "string"
            }
          },
          "startUrls": {
            "title": "Direct RSS Feed URLs",
            "type": "array",
            "description": "Advanced: Provide raw Google News RSS feed URLs directly. Use for custom queries already formatted as RSS (e.g. from Google Alerts exports). Each URL must be a valid RSS feed returning XML.",
            "items": {
              "type": "object",
              "required": [
                "url"
              ],
              "properties": {
                "url": {
                  "type": "string",
                  "title": "URL of a web page",
                  "format": "uri"
                }
              }
            }
          },
          "maxResultsPerQuery": {
            "title": "Max Articles Per Query / Topic",
            "minimum": 1,
            "maximum": 100,
            "type": "integer",
            "description": "Maximum number of articles to extract per search query or topic feed. Google News RSS feeds return up to 100 articles per request. Default is 100. For bulk jobs with many queries, set lower (e.g. 10–20) to stay within budget.",
            "default": 100
          },
          "regionLanguage": {
            "title": "Region & Language",
            "enum": [
              "US:en",
              "GB:en",
              "AU:en",
              "CA:en",
              "IN:en",
              "DE:de",
              "AT:de",
              "CH:de",
              "FR:fr",
              "BE:fr",
              "CH:fr",
              "ES:es",
              "MX:es",
              "AR:es",
              "CO:es",
              "IT:it",
              "PT:pt",
              "BR:pt",
              "NL:nl",
              "PL:pl",
              "RU:ru",
              "JP:ja",
              "CN:zh-Hans",
              "TW:zh-Hant",
              "KR:ko",
              "SA:ar",
              "EG:ar",
              "TR:tr",
              "IL:he",
              "SE:sv",
              "NO:no",
              "DK:da",
              "FI:fi",
              "CZ:cs",
              "HU:hu",
              "RO:ro",
              "GR:el",
              "UA:uk",
              "ID:id",
              "TH:th",
              "VN:vi",
              "NG:en",
              "ZA:en",
              "KE:en",
              "GH:en",
              "PK:en",
              "BD:en",
              "PH:en",
              "SG:en",
              "NZ:en",
              "IE:en"
            ],
            "type": "string",
            "description": "Controls the Google News edition to query — determines language, regional sources, and geographically relevant articles. Format: COUNTRY_CODE:language_code (e.g. US:en, GB:en, DE:de, FR:fr, JP:ja). Defaults to US:en (US English). Use this to monitor non-English news or regional publications.",
            "default": "US:en"
          },
          "timeRange": {
            "title": "Time Range (Published Within)",
            "enum": [
              "any",
              "1h",
              "1d",
              "7d",
              "30d",
              "1y"
            ],
            "type": "string",
            "description": "Filter articles by how recently they were published. Use '1h' for breaking news, '1d' for daily monitoring, '7d' for weekly digests. Defaults to 'any' (no time filter — returns all available articles).",
            "default": "any"
          },
          "extractFullText": {
            "title": "Extract Full Article Text",
            "type": "boolean",
            "description": "When enabled, the actor visits each article page and extracts the full body text. Produces a full_text field and word_count field on each record. Ideal for AI/LLM pipelines, RAG (Retrieval-Augmented Generation), sentiment analysis, and NLP workloads. Requires source_url to be resolvable — increases run time and cost (additional article-full-text charge applies per article with text extracted).",
            "default": false
          },
          "decodeUrls": {
            "title": "Decode Real Article URLs",
            "type": "boolean",
            "description": "When enabled, attempts to resolve the real article URL (source_url) by following Google News redirect links. Note: Google News uses JavaScript-based URL encoding that cannot be fully resolved via HTTP redirects alone — source_url may still be null for some articles. Increases run time. Disable for faster metadata-only extraction.",
            "default": false
          },
          "deduplicateResults": {
            "title": "Deduplicate Results",
            "type": "boolean",
            "description": "When enabled (default), removes duplicate articles across queries and topics based on URL. Prevents the same article from appearing multiple times when it matches several search queries or topics simultaneously. Disable only if you need to know which queries each article appeared in (the search_query field tracks this).",
            "default": true
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}