{
  "openapi": "3.0.1",
  "info": {
    "title": "PDF to HTML Converter",
    "description": "Convert PDFs to clean HTML preserving formatting, headings, tables, and layout. Multi-page support with per-page or combined output. OCR fallback for image PDFs. Inline CSS styling. Download via API.",
    "version": "1.0",
    "x-build-id": "Tqgjp7BhTgKiV0z6Z"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/junipr~pdf-to-html/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-junipr-pdf-to-html",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/junipr~pdf-to-html/runs": {
      "post": {
        "operationId": "runs-sync-junipr-pdf-to-html",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/junipr~pdf-to-html/run-sync": {
      "post": {
        "operationId": "run-sync-junipr-pdf-to-html",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "sources": {
            "title": "PDF Sources",
            "type": "array",
            "description": "List of PDF sources to convert. Each source is an object with either a 'url' field (HTTP/HTTPS URL to a PDF) or 'kvStoreKey' + 'kvStoreId' fields (to load from an Apify Key-Value Store).",
            "default": [
              {
                "url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
              }
            ]
          },
          "maxPdfs": {
            "title": "Max PDFs",
            "minimum": 1,
            "maximum": 5000,
            "type": "integer",
            "description": "Maximum number of PDFs to process per run. Use this to limit costs on large batches.",
            "default": 50
          },
          "detectHeadings": {
            "title": "Detect Headings",
            "type": "boolean",
            "description": "Analyze font sizes and weights to infer heading levels (H1-H6). Larger, bolder text becomes higher-level headings.",
            "default": true
          },
          "detectLists": {
            "title": "Detect Lists",
            "type": "boolean",
            "description": "Detect bullet points and numbered sequences, converting them to proper <ul> and <ol> HTML elements.",
            "default": true
          },
          "detectTables": {
            "title": "Detect Tables",
            "type": "boolean",
            "description": "Detect tabular data layouts and convert them to semantic <table> elements with <thead> and <tbody>.",
            "default": true
          },
          "extractImages": {
            "title": "Extract Images",
            "type": "boolean",
            "description": "Extract embedded images from the PDF and store them in the Key-Value Store. Images are referenced via <img> tags in the HTML output.",
            "default": true
          },
          "imageFormat": {
            "title": "Image Format",
            "enum": [
              "png",
              "jpeg",
              "webp"
            ],
            "type": "string",
            "description": "Output format for extracted images. PNG for lossless quality, JPEG/WebP for smaller file sizes.",
            "default": "png"
          },
          "imageQuality": {
            "title": "Image Quality",
            "minimum": 1,
            "maximum": 100,
            "type": "integer",
            "description": "Quality level for JPEG and WebP image extraction (1-100). Higher values produce larger but sharper images. Ignored for PNG.",
            "default": 85
          },
          "preserveLinks": {
            "title": "Preserve Links",
            "type": "boolean",
            "description": "Preserve hyperlinks from the PDF as clickable <a> elements in the HTML output.",
            "default": true
          },
          "detectColumns": {
            "title": "Detect Columns",
            "type": "boolean",
            "description": "Detect multi-column layouts and merge content in natural reading order (left-to-right, top-to-bottom).",
            "default": true
          },
          "stylingMode": {
            "title": "Styling Mode",
            "enum": [
              "inline",
              "class",
              "none"
            ],
            "type": "string",
            "description": "CSS strategy for the HTML output. 'class' adds class names with a <style> block, 'inline' adds style attributes directly, 'none' produces pure semantic HTML with no styling.",
            "default": "class"
          },
          "includeDefaultStyles": {
            "title": "Include Default Styles",
            "type": "boolean",
            "description": "Include a built-in stylesheet that makes the HTML output look presentable. Only applies when stylingMode is 'class'.",
            "default": true
          },
          "customCss": {
            "title": "Custom CSS",
            "type": "string",
            "description": "Custom CSS to inject into the output. Appended after default styles. Only applies when stylingMode is 'class'."
          },
          "preserveFontStyles": {
            "title": "Preserve Font Styles",
            "type": "boolean",
            "description": "Preserve bold, italic, and underline formatting from the PDF. When disabled, output is plain semantic HTML.",
            "default": true
          },
          "preserveFontSizes": {
            "title": "Preserve Font Sizes",
            "type": "boolean",
            "description": "Include font-size information in the HTML output. When disabled, headings are determined by relative size only.",
            "default": false
          },
          "preserveColors": {
            "title": "Preserve Colors",
            "type": "boolean",
            "description": "Preserve text and background colors from the PDF in the HTML output.",
            "default": false
          },
          "pageRange": {
            "title": "Page Range",
            "type": "string",
            "description": "Convert only specific pages. Supports ranges and comma-separated values: '1-5', '1,3,5', '1-3,7,9-12'. Leave empty to convert all pages."
          },
          "pageBreakMode": {
            "title": "Page Break Mode",
            "enum": [
              "hr",
              "div",
              "none"
            ],
            "type": "string",
            "description": "How to mark page boundaries in the HTML output. 'hr' inserts <hr> elements, 'div' wraps each page in a <div class=\"page\">, 'none' produces continuous HTML.",
            "default": "hr"
          },
          "wrapInDocument": {
            "title": "Wrap in HTML Document",
            "type": "boolean",
            "description": "Wrap the output in a complete HTML5 document with <!DOCTYPE html>, <html>, <head>, and <body> tags. When disabled, output is an HTML fragment.",
            "default": false
          },
          "includePageNumbers": {
            "title": "Include Page Numbers",
            "type": "boolean",
            "description": "Add page number annotations (e.g., <span class=\"page-number\">Page 1</span>) to the HTML output.",
            "default": false
          },
          "password": {
            "title": "PDF Password",
            "type": "string",
            "description": "Password for opening encrypted PDFs. Applied to all sources unless overridden per-source."
          },
          "requestTimeout": {
            "title": "Request Timeout (ms)",
            "minimum": 5000,
            "maximum": 300000,
            "type": "integer",
            "description": "Timeout in milliseconds for downloading each PDF from a URL.",
            "default": 60000
          },
          "maxFileSizeMb": {
            "title": "Max File Size (MB)",
            "minimum": 1,
            "maximum": 500,
            "type": "integer",
            "description": "Maximum PDF file size in megabytes. PDFs larger than this are skipped with a FILE_TOO_LARGE error.",
            "default": 100
          },
          "proxyConfiguration": {
            "title": "Proxy Configuration",
            "type": "object",
            "description": "Proxy settings for downloading PDFs from URLs. Defaults to Apify datacenter proxies.",
            "default": {
              "useApifyProxy": true
            }
          },
          "httpHeaders": {
            "title": "HTTP Headers",
            "type": "object",
            "description": "Custom HTTP headers sent with PDF download requests.",
            "default": {}
          },
          "maxRetries": {
            "title": "Max Retries",
            "minimum": 0,
            "maximum": 10,
            "type": "integer",
            "description": "Maximum number of retry attempts for failed PDF downloads, with exponential backoff.",
            "default": 3
          },
          "extractMetadata": {
            "title": "Extract Metadata",
            "type": "boolean",
            "description": "Extract PDF metadata (title, author, creation date, etc.) and include in the output.",
            "default": true
          },
          "includeMetaTags": {
            "title": "Include Meta Tags",
            "type": "boolean",
            "description": "When wrapInDocument is enabled, add <meta> tags from PDF metadata (title, author, subject) to the HTML <head>.",
            "default": true
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}