{
  "openapi": "3.0.1",
  "info": {
    "title": "Zhihu Scraper — Q&A, Answers, Articles, Columns",
    "description": "Zhihu scraper — extract long-form Mandarin Q&A, expert answers, articles & column posts. Keyword search, question answer threads, article detail, column article list. China market research, LLM training data, competitive intel. Four operations, one clean dataset per run. No API key.",
    "version": "1.1",
    "x-build-id": "dNeXR1MAeIdTEIgRY"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/sian.agency~zhihu-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-sian.agency-zhihu-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/sian.agency~zhihu-scraper/runs": {
      "post": {
        "operationId": "runs-sync-sian.agency-zhihu-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/sian.agency~zhihu-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-sian.agency-zhihu-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "operation"
        ],
        "properties": {
          "operation": {
            "title": "🎯 Operation — what do you want to scrape?",
            "enum": [
              "search",
              "answerList",
              "articleDetail",
              "columnArticleList"
            ],
            "type": "string",
            "description": "🎯 **PICK ONE OPERATION PER RUN.** Each run produces one clean dataset matching the chosen mode.\n\n- **🔍 Search Zhihu** — keyword search across answers, questions, articles, people (~20 mixed results/page)\n- **❓ Question Answers** — paginated answers for a single question, ranked by Zhihu's algorithm (~5 answers/page)\n- **📰 Article Detail** — single Zhihu column article by ID (full content, author, vote/comment counts, topics)\n- **📚 Column Articles** — paginated article list from a Zhihu column (zhuanlan) by slug (~10 articles/page)\n\n💡 **TIP:** To combine operations, run the actor multiple times with different configurations.",
            "default": "search"
          },
          "keyword": {
            "title": "🔍 Search Keyword (for Search Zhihu)",
            "type": "string",
            "description": "🔍 **Required for the `Search Zhihu` operation.**\n\nAny Zhihu search query. Mixed Chinese / English supported:\n- `Python`\n- `人工智能` (artificial intelligence)\n- `投资理财` (investment)\n- `品牌营销` (brand marketing)\n\n💡 **TIP:** Chinese-language queries return native Mandarin results; English queries surface bilingual / cross-cultural threads. Mixed results include answers, questions, articles, and people — filter the dataset by `resultType` to split modes.\n\n⚠️ **Ignored** for the Question Answers, Article Detail, and Column Articles operations."
          },
          "questionId": {
            "title": "❓ Question ID (for Question Answers)",
            "type": "string",
            "description": "❓ **Required for the `Question Answers` operation.**\n\nThe numeric Zhihu question ID. You can find it:\n- In any Zhihu question URL: `https://www.zhihu.com/question/{ID}` → the trailing numeric segment\n- In the `questionId` field of any answer or search result row\n\n💡 **TIP:** To pull a question's full answer thread, start with `maxPages: 5` (~25 answers) and increase as needed. Answers are returned in Zhihu's ranking order — top-voted first by default.\n\n⚠️ **Ignored** for Search, Article Detail, and Column Articles operations."
          },
          "articleId": {
            "title": "📰 Article ID (for Article Detail)",
            "type": "string",
            "description": "📰 **Required for the `Article Detail` operation.**\n\nThe numeric Zhihu column article ID. You can find it:\n- In any Zhuanlan URL: `https://zhuanlan.zhihu.com/p/{ID}` → the trailing numeric segment\n- In the `articleId` field of any search or column-article-list result row\n\n💡 **TIP:** Article Detail returns the full HTML content body, author profile, vote/comment counts, topics, and the parent column reference — ideal for in-depth scraping of a known article.\n\n⚠️ **Ignored** for Search, Question Answers, and Column Articles operations."
          },
          "columnId": {
            "title": "📚 Column ID / Slug (for Column Articles)",
            "type": "string",
            "description": "📚 **Required for the `Column Articles` operation.**\n\nThe Zhihu column (zhuanlan) slug. Found in the column URL: `https://zhuanlan.zhihu.com/{slug}` → the trailing path segment.\n\nExamples:\n- `xuehy` — Xue Hongyan's investment column\n- `qingreading` — popular reading-recommendation column\n- `kaiyuan` — open-source / tech column\n\n💡 **TIP:** Use Column Articles to pull a curator's entire article history in chronological reverse order. Combine with Article Detail to enrich the top-N articles with full content.\n\n⚠️ **Ignored** for Search, Question Answers, and Article Detail operations."
          },
          "maxPages": {
            "title": "📄 Max pages to fetch",
            "minimum": 1,
            "maximum": 50,
            "type": "integer",
            "description": "📄 **Applies to paginated operations** (Search Zhihu, Question Answers, Column Articles). Ignored for Article Detail (single record).\n\n- **Search Zhihu:** ~20 mixed results per page\n- **Question Answers:** ~5 answers per page\n- **Column Articles:** ~10 articles per page\n\n💡 **TIP:** Start small (1–3 pages) to preview results before scaling up.\n\n⚠️ Hard cap: 50 pages to prevent runaway runs.",
            "default": 5
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}