{
  "openapi": "3.0.1",
  "info": {
    "title": "Generic Articles Main Content Extractor",
    "description": "Extract the main content of articles. Input can be article links or pages from which to identify and extract article links. Articles are scraped and cleaned to extract the main text and many useful metadatas. Search terms and date post filters can be applied and highlighted snippets produced.",
    "version": "0.0",
    "x-build-id": "faS19vdVtXgKAGUrO"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/nlp_data_lni~generic-articles-content-extractor/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-nlp_data_lni-generic-articles-content-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/nlp_data_lni~generic-articles-content-extractor/runs": {
      "post": {
        "operationId": "runs-sync-nlp_data_lni-generic-articles-content-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/nlp_data_lni~generic-articles-content-extractor/run-sync": {
      "post": {
        "operationId": "run-sync-nlp_data_lni-generic-articles-content-extractor",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "article_urls": {
            "title": "Article URLs",
            "type": "array",
            "description": "A list of article urls to process.",
            "items": {
              "type": "string"
            }
          },
          "article_links_urls": {
            "title": "Article Links URLS",
            "type": "array",
            "description": "A list of pages with article links ('url' (required): page url, 'external' (defaults to 'false'): allow extra domain links,'auto' (defaults to 'true'): enable automatic links extraction mode,  'include': matching patterns for valid links,  'exclude': matching patterns for invalid links.\nExample of valid input:\n [{ 'url': 'https://www.forbes.com/business/', 'auto': false, 'external': false, 'include': '/sites/', 'exclude': '' }]"
          },
          "max_links_by_url": {
            "title": "Max links per article links url",
            "type": "integer",
            "description": "The maximum number of links to extract from each article links url provided in 'article_links_urls' input.",
            "default": 10
          },
          "markdown_text": {
            "title": "Get Markdown Text",
            "type": "boolean",
            "description": "Enable Markdown text extraction",
            "default": false
          },
          "terms": {
            "title": "Search Terms Post-Filter",
            "type": "array",
            "description": "A list of terms to look for in each article content. When a term is found, a specific column is filled in the result table",
            "items": {
              "type": "string"
            }
          },
          "max_hl_snippets": {
            "title": "Maximum highlighted snippets by search term",
            "type": "integer",
            "description": "Maximum number of highlighted snippets to output by search term. If set to 0, highlighting is disabled",
            "default": 3
          },
          "date_from": {
            "title": "From Date Post-Filter",
            "pattern": "^(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12]\\d|3[01])$|^(\\d+)\\s*(day|week|month|year)s?$|^$",
            "type": "string",
            "description": "Check if an article has been published since a given date. Select date in format YYYY-MM-DD or {number}{unit}"
          },
          "or_post_filters": {
            "title": "Enable 'OR' combination of Post-Filters to set if an article is valid. The default is an 'AND' combination. When no post-filter is applied, all collected articles are valid",
            "type": "boolean",
            "description": "Enable global statistics computation.",
            "default": false
          },
          "compute_stats": {
            "title": "Compute Statistics",
            "type": "boolean",
            "description": "Enable global statistics computation.",
            "default": false
          },
          "only_valid_stats": {
            "title": "Compute statistics based only on valid articles",
            "type": "boolean",
            "description": "When enabled, statistics are only computed based on valid articles.",
            "default": false
          },
          "proxy_settings": {
            "title": "Proxy configuration",
            "type": "object",
            "description": "Select proxies to be used for crawling."
          },
          "max_retries": {
            "title": "Maximum number of retries on request errors",
            "type": "integer",
            "description": "The maximum number of times a request will be retried on network, proxy or server errors.",
            "default": 3
          },
          "timeout": {
            "title": "Request timeout in seconds",
            "type": "integer",
            "description": "Timeout in seconds for making a request.",
            "default": 30
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}