{
  "openapi": "3.0.1",
  "info": {
    "title": "PDF to MP3 - Convert PDF, EPUB, DOCX & Text to Audiobook",
    "description": "Convert PDF, EPUB, DOCX, Markdown, HTML, TXT, and RTF to MP3 audiobooks. Free Microsoft Edge TTS (no API key) with OCR for scanned PDFs, 70+ languages, and optional OpenAI or ElevenLabs voices. ~$0.04/min.",
    "version": "0.0",
    "x-build-id": "9ItByvFGFeXIoxx7l"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/marielise.dev~pdf-to-mp3/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-marielise.dev-pdf-to-mp3",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/marielise.dev~pdf-to-mp3/runs": {
      "post": {
        "operationId": "runs-sync-marielise.dev-pdf-to-mp3",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/marielise.dev~pdf-to-mp3/run-sync": {
      "post": {
        "operationId": "run-sync-marielise.dev-pdf-to-mp3",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "properties": {
          "documentUrl": {
            "title": "Document URL",
            "type": "string",
            "description": "Public URL of the document you want to narrate. Supported formats: PDF (.pdf), Word (.docx), EPUB (.epub), Markdown (.md, .markdown, .mdx), plain text (.txt), HTML (.html, .htm, .xhtml), Rich Text (.rtf). Either documentUrl, documentFile, documentBase64, or text must be provided. NOTE: scanned/image-only PDFs are supported via automatic OCR (see enableOcr). Encrypted/password-protected PDFs are supported via the pdfPassword input. Encrypted DOCX files are not supported."
          },
          "documentFile": {
            "title": "Or upload a file from your device",
            "type": "array",
            "description": "Drag and drop or select a local file. Accepted: .pdf, .docx, .epub, .md, .markdown, .txt, .html, .htm, .rtf. Uploaded to a key-value store and narrated. If multiple are provided, only the first is used."
          },
          "documentBase64": {
            "title": "Or paste Base64-encoded document",
            "type": "string",
            "description": "Alternative input (e.g. for the CLI/API): base64-encoded document bytes. Useful when the document is not publicly hosted and you are not using the upload widget. Format auto-detected from magic bytes / content."
          },
          "text": {
            "title": "Or paste raw text / Markdown",
            "type": "string",
            "description": "Paste prose, Markdown, or HTML directly. Quickest way to narrate a blog post draft, a long ChatGPT reply, or a README. Auto-detected as Markdown if it contains heading or list syntax."
          },
          "model": {
            "title": "TTS Model",
            "enum": [
              "edge-tts",
              "openai-gpt-4o-mini-tts",
              "openai-tts-1",
              "openai-tts-1-hd",
              "elevenlabs-flash-v2_5",
              "elevenlabs-turbo-v2_5"
            ],
            "type": "string",
            "description": "Which text-to-speech engine to use. edge-tts is FREE (Microsoft Edge neural voices, no API key needed, no per-character cost) and the recommended default for long books. All OpenAI and ElevenLabs models require BYOK (Bring Your Own Key): you pay the provider directly using your own API key, in addition to this actor's small per-page fee.",
            "default": "edge-tts"
          },
          "language": {
            "title": "Language",
            "enum": [
              "auto",
              "en",
              "es",
              "fr",
              "de",
              "it",
              "pt",
              "nl",
              "ru",
              "pl",
              "tr",
              "ar",
              "zh",
              "ja",
              "ko",
              "hi",
              "uk",
              "sv",
              "no",
              "da",
              "fi",
              "cs",
              "el",
              "he",
              "th",
              "vi",
              "id",
              "ro",
              "hu"
            ],
            "type": "string",
            "description": "Language of the text. 'Auto-detect' reads the content and picks a matching voice automatically (recommended). Pick a specific language to force it. Only used for Edge TTS, where each voice is locked to one language; OpenAI voices are multilingual and follow the text. Ignored if you set an explicit Voice below.",
            "default": "auto"
          },
          "voice": {
            "title": "Voice (optional, overrides language)",
            "type": "string",
            "description": "Leave blank to auto-pick by language (recommended).\n\n• Edge TTS (free) — Azure ShortName. Popular examples:\n  - English: en-US-AndrewNeural, en-US-AvaNeural, en-GB-SoniaNeural, en-AU-NatashaNeural\n  - Spanish: es-ES-ElviraNeural, es-MX-DaliaNeural\n  - French: fr-FR-DeniseNeural, fr-CA-SylvieNeural\n  - German: de-DE-KatjaNeural\n  - Italian: it-IT-ElsaNeural\n  - Portuguese: pt-BR-FranciscaNeural, pt-PT-RaquelNeural\n  - 400+ voices total covering 70+ languages.\n\n• OpenAI: alloy, echo, fable, onyx, nova, shimmer, coral, sage.\n\n• ElevenLabs: a voice ID (e.g. 21m00Tcm4TlvDq8ikWAM for Rachel)."
          },
          "speed": {
            "title": "Speech Speed",
            "minimum": 0.25,
            "maximum": 4,
            "type": "number",
            "description": "Playback speed multiplier. 1.0 = normal pace. Range 0.25 to 4.0. Only applies to OpenAI tts-1 / tts-1-hd. gpt-4o-mini-tts ignores this (use instructions instead).",
            "default": 1
          },
          "instructions": {
            "title": "Voice Instructions (gpt-4o-mini-tts only)",
            "type": "string",
            "description": "Free-form style guidance for the gpt-4o-mini-tts model, e.g. 'Calm, slow audiobook narrator' or 'Energetic podcast host'. Ignored by other models."
          },
          "pageRange": {
            "title": "Page / Section Range",
            "type": "string",
            "description": "Optional 1-indexed range. For PDFs this is the actual page range. For DOCX / EPUB / TXT / MD / HTML / RTF, the cleaned text is split into ~3000-char pseudo-pages so the same range syntax still works (EPUBs are walked in spine order). Examples: '1-10', '1,3,5', '1-3,7-9'. Leave empty for the full document."
          },
          "enableOcr": {
            "title": "OCR scanned / image-only PDFs",
            "type": "boolean",
            "description": "When a PDF page has no selectable text layer (scanned documents, photographed pages), run it through OCR (Tesseract) to recover the text and narrate it. Only pages that actually need OCR are processed and billed (ocr-page-processed event). Leave on for 'just works' behavior; turn off to fail fast on scans instead.",
            "default": true
          },
          "pdfPassword": {
            "title": "PDF Password (for encrypted PDFs)",
            "type": "string",
            "description": "Password to decrypt a password-protected PDF before extraction. Leave blank for normal PDFs."
          },
          "chunkSize": {
            "title": "Chunk Size (characters)",
            "minimum": 500,
            "maximum": 4096,
            "type": "integer",
            "description": "Characters per TTS request. OpenAI accepts up to ~4096. ElevenLabs caps at 2500 (free / starter plans) - the actor auto-clamps to 2500 for ElevenLabs models. Smaller chunks recover better from errors but cost the same in total.",
            "default": 4000
          },
          "concurrency": {
            "title": "Parallel TTS Requests",
            "minimum": 1,
            "maximum": 20,
            "type": "integer",
            "description": "How many TTS chunks to synthesize in parallel. 5 is a safe default. Auto-clamped per provider (Edge 8, OpenAI 10, ElevenLabs 2) to avoid rate limits. Range: 1-20.",
            "default": 5
          },
          "resume": {
            "title": "Resume from previous run (recommended for books)",
            "type": "boolean",
            "description": "If a previous run for the same document + voice + model failed or timed out, skip already-synthesized chunks and continue. Uses a named key-value store called 'pdf-audio-cache' shared across runs. Disable to force a full re-narration.",
            "default": true
          },
          "skipFailedChunks": {
            "title": "Skip failed chunks instead of aborting",
            "type": "boolean",
            "description": "If a single chunk keeps failing after retries (e.g. malformed text), skip it with a warning and keep narrating the rest of the document. The output status becomes 'partial' and failed chunk indexes are listed. Auth/quota errors always abort regardless, since every chunk would fail. Disable to stop on the first failed chunk.",
            "default": true
          },
          "maxPartMb": {
            "title": "Max size per audio part (MB)",
            "minimum": 1,
            "maximum": 500,
            "type": "integer",
            "description": "Long documents are split into multiple MP3 parts so each stays a manageable, chapter-sized file. When a part reaches this size it is finalized and uploaded, and disk is freed. Lower values = more, smaller files. Range 1-500.",
            "default": 40
          },
          "maxCostUsd": {
            "title": "Max cost ceiling (USD, optional)",
            "minimum": 0.02,
            "type": "number",
            "description": "Hard cap on actor charges. If the pre-flight estimate exceeds this number the run aborts BEFORE any TTS happens (you only pay the actor-start fee plus any OCR already done). The actual audio-minute charge is also clamped to this cap, so the final bill never exceeds it even if the produced audio runs longer than estimated. Must be at least 0.02. Leave blank to disable. Caps the actor's PPE charges only - your OpenAI / ElevenLabs provider costs (BYOK) are separate and not bounded by this."
          },
          "openaiApiKey": {
            "title": "OpenAI API Key (REQUIRED for OpenAI models)",
            "type": "string",
            "description": "Your own OpenAI API key. REQUIRED whenever you pick an openai-* model (tts-1, tts-1-hd, gpt-4o-mini-tts). You pay OpenAI directly for the TTS calls and we charge a small per-page actor fee on top. Not needed for Edge TTS (free) or ElevenLabs models. Get a key at https://platform.openai.com/api-keys"
          },
          "elevenlabsApiKey": {
            "title": "ElevenLabs API Key (REQUIRED for ElevenLabs models)",
            "type": "string",
            "description": "Your ElevenLabs API key. REQUIRED whenever you pick an elevenlabs-* model. You pay ElevenLabs directly for the audio generation and we charge a small per-page actor fee on top. Get a key at https://elevenlabs.io/app/settings/api-keys"
          },
          "proxyConfiguration": {
            "title": "Proxy (for Document URL fetch)",
            "type": "object",
            "description": "Optional proxy used only when fetching from a Document URL. Helps with hosts that block datacenter IPs. Ignored for uploaded files, base64, and raw text.",
            "default": {
              "useApifyProxy": false
            }
          },
          "debug": {
            "title": "Debug Mode",
            "type": "boolean",
            "description": "Enable verbose logging for troubleshooting.",
            "default": false
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}