{
  "openapi": "3.0.1",
  "info": {
    "title": "Website Media Extractor & Scraper",
    "description": "The Advanced Website Media Extractor & Scraper is a comprehensive media extraction tool that supports images, videos, audio, documents, archives, e-books, fonts, apps, and contact information from websites. Features advanced filtering, proxy support, and detailed analytics.",
    "version": "2.0",
    "x-build-id": "kPLX0eK2gz54BTBM4"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/hlymrk~html-web-media-scraper/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-hlymrk-html-web-media-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/hlymrk~html-web-media-scraper/runs": {
      "post": {
        "operationId": "runs-sync-hlymrk-html-web-media-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/hlymrk~html-web-media-scraper/run-sync": {
      "post": {
        "operationId": "run-sync-hlymrk-html-web-media-scraper",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "startUrls"
        ],
        "properties": {
          "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with.",
            "items": {
              "type": "object",
              "required": [
                "url"
              ],
              "properties": {
                "url": {
                  "type": "string",
                  "title": "URL of a web page",
                  "format": "uri"
                }
              }
            }
          },
          "maxRequestsPerCrawl": {
            "title": "Max Requests per Crawl",
            "minimum": 1,
            "maximum": 10000,
            "type": "integer",
            "description": "Maximum number of requests that can be made by this crawler.",
            "default": 100
          },
          "proxyConfiguration": {
            "title": "Proxy Configuration",
            "type": "object",
            "description": "Proxy settings for the crawler",
            "default": {
              "useApifyProxy": true
            }
          },
          "mediaTypes": {
            "title": "Media Types to Extract",
            "type": "array",
            "description": "Select which media types to extract",
            "items": {
              "type": "string",
              "enum": [
                "images",
                "videos",
                "audios",
                "documents",
                "archives",
                "ebooks",
                "fonts",
                "apps",
                "contacts",
                "svg",
                "embed",
                "objects"
              ]
            },
            "default": [
              "images",
              "videos",
              "audios",
              "documents"
            ]
          },
          "imageOptions": {
            "title": "Image Extraction Options",
            "type": "object",
            "description": "Configure image extraction settings",
            "properties": {
              "includeBackgroundImages": {
                "title": "Include Background Images",
                "type": "boolean",
                "description": "Extract images from CSS background-image properties",
                "editor": "checkbox",
                "default": true
              },
              "minImageSize": {
                "title": "Minimum Image Size (pixels)",
                "type": "integer",
                "description": "Skip images smaller than this size (width or height)",
                "editor": "number",
                "default": 50,
                "minimum": 1
              },
              "includeDataUrls": {
                "title": "Include Data URLs",
                "type": "boolean",
                "description": "Include base64 encoded images (data: URLs)",
                "editor": "checkbox",
                "default": false
              }
            },
            "default": {
              "includeBackgroundImages": true,
              "minImageSize": 50,
              "includeDataUrls": false
            }
          },
          "fileFilters": {
            "title": "File Filtering Options",
            "type": "object",
            "description": "Filter files by size, type, and other criteria",
            "properties": {
              "maxFileSize": {
                "title": "Maximum File Size (MB)",
                "type": "number",
                "description": "Skip files larger than this size",
                "editor": "number",
                "default": 100,
                "minimum": 0.1,
                "maximum": 1000
              },
              "allowedExtensions": {
                "title": "Allowed File Extensions",
                "type": "array",
                "description": "Only extract files with these extensions (leave empty for all)",
                "editor": "requestListSources",
                "items": {
                  "type": "object"
                },
                "default": []
              },
              "blockedExtensions": {
                "title": "Blocked File Extensions",
                "type": "array",
                "description": "Skip files with these extensions",
                "editor": "requestListSources",
                "items": {
                  "type": "object"
                },
                "default": []
              }
            },
            "default": {
              "maxFileSize": 100,
              "allowedExtensions": [],
              "blockedExtensions": []
            }
          },
          "contactExtraction": {
            "title": "Contact Information Extraction",
            "type": "object",
            "description": "Configure contact information extraction",
            "properties": {
              "extractContacts": {
                "title": "Extract Contact Information",
                "type": "boolean",
                "description": "Extract emails, phone numbers, and social media profiles",
                "editor": "checkbox",
                "default": true
              },
              "includeEmails": {
                "title": "Include Email Addresses",
                "type": "boolean",
                "description": "Extract email addresses from page content",
                "editor": "checkbox",
                "default": true
              },
              "includePhones": {
                "title": "Include Phone Numbers",
                "type": "boolean",
                "description": "Extract phone numbers from page content",
                "editor": "checkbox",
                "default": true
              },
              "includeSocialMedia": {
                "title": "Include Social Media Profiles",
                "type": "boolean",
                "description": "Extract social media profile links",
                "editor": "checkbox",
                "default": true
              }
            },
            "default": {
              "extractContacts": true,
              "includeEmails": true,
              "includePhones": true,
              "includeSocialMedia": true
            }
          },
          "outputOptions": {
            "title": "Output Configuration",
            "type": "object",
            "description": "Configure output format and content",
            "properties": {
              "includeSummary": {
                "title": "Include Summary Statistics",
                "type": "boolean",
                "description": "Add summary statistics to each result",
                "editor": "checkbox",
                "default": true
              },
              "includeMetadata": {
                "title": "Include File Metadata",
                "type": "boolean",
                "description": "Include additional file metadata (size, type, etc.)",
                "editor": "checkbox",
                "default": true
              },
              "groupByType": {
                "title": "Group Results by Media Type",
                "type": "boolean",
                "description": "Organize results by media type categories",
                "editor": "checkbox",
                "default": true
              }
            },
            "default": {
              "includeSummary": true,
              "includeMetadata": true,
              "groupByType": true
            }
          },
          "crawlingOptions": {
            "title": "Advanced Crawling Options",
            "type": "object",
            "description": "Advanced settings for crawling behavior",
            "properties": {
              "respectRobotsTxt": {
                "title": "Respect robots.txt",
                "type": "boolean",
                "description": "Follow robots.txt directives",
                "editor": "checkbox",
                "default": true
              },
              "userAgent": {
                "title": "Custom User Agent",
                "type": "string",
                "description": "Custom user agent string (leave empty for default)",
                "editor": "textfield",
                "default": ""
              },
              "maxRetries": {
                "title": "Maximum Retries",
                "type": "integer",
                "description": "Maximum number of retries for failed requests",
                "editor": "number",
                "default": 3,
                "minimum": 0,
                "maximum": 10
              }
            },
            "default": {
              "respectRobotsTxt": true,
              "userAgent": "",
              "maxRetries": 3
            }
          },
          "conversionOptions": {
            "title": "Media Conversion Options",
            "type": "object",
            "description": "Convert media formats for better compatibility",
            "properties": {
              "convertSvgToImage": {
                "title": "Convert SVG to Images",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Convert SVG elements to raster images",
                "default": false
              },
              "convertCanvasToImage": {
                "title": "Convert Canvas to Images",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Convert canvas elements to images",
                "default": false
              },
              "imageFormat": {
                "title": "Conversion Image Format",
                "type": "string",
                "description": "Format for converted images",
                "editor": "select",
                "enum": [
                  "png",
                  "jpeg",
                  "webp"
                ],
                "default": "png"
              },
              "imageQuality": {
                "title": "Image Quality (1-100)",
                "type": "integer",
                "description": "Quality for JPEG/WebP conversion",
                "editor": "number",
                "minimum": 1,
                "maximum": 100,
                "default": 90
              },
              "maxConversionWidth": {
                "title": "Max Conversion Width",
                "type": "integer",
                "description": "Maximum width for converted images",
                "editor": "number",
                "default": 2048,
                "minimum": 100
              },
              "maxConversionHeight": {
                "title": "Max Conversion Height",
                "type": "integer",
                "description": "Maximum height for converted images",
                "editor": "number",
                "default": 2048,
                "minimum": 100
              }
            },
            "default": {
              "convertSvgToImage": false,
              "convertCanvasToImage": false,
              "imageFormat": "png",
              "imageQuality": 90,
              "maxConversionWidth": 2048,
              "maxConversionHeight": 2048
            }
          },
          "duplicateDetection": {
            "title": "Duplicate Detection",
            "type": "object",
            "description": "Remove duplicate media items",
            "properties": {
              "enableDuplicateDetection": {
                "title": "Enable Duplicate Detection",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Remove duplicate media items",
                "default": true
              },
              "compareBy": {
                "title": "Compare By",
                "type": "array",
                "description": "Criteria for duplicate detection",
                "editor": "select",
                "items": {
                  "type": "string",
                  "enum": [
                    "src",
                    "alt",
                    "fileSize",
                    "dimensions"
                  ]
                },
                "default": [
                  "src",
                  "dimensions"
                ]
              },
              "similarityThreshold": {
                "title": "Similarity Threshold (0-1)",
                "type": "number",
                "description": "Threshold for considering items as duplicates",
                "editor": "number",
                "minimum": 0,
                "maximum": 1,
                "default": 0.8
              }
            },
            "default": {
              "enableDuplicateDetection": true,
              "compareBy": [
                "src",
                "dimensions"
              ],
              "similarityThreshold": 0.8
            }
          },
          "validationOptions": {
            "title": "Media Validation",
            "type": "object",
            "description": "Validate media files and URLs",
            "properties": {
              "enableValidation": {
                "title": "Enable Media Validation",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Validate media URLs and file properties",
                "default": true
              },
              "checkUrlAccessibility": {
                "title": "Check URL Accessibility",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Verify that media URLs are accessible",
                "default": true
              },
              "validateFileHeaders": {
                "title": "Validate File Headers",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Check MIME types and file headers",
                "default": true
              },
              "validationTimeout": {
                "title": "Validation Timeout (ms)",
                "type": "integer",
                "description": "Timeout for URL validation requests",
                "editor": "number",
                "default": 10000,
                "minimum": 1000,
                "maximum": 30000
              }
            },
            "default": {
              "enableValidation": true,
              "checkUrlAccessibility": true,
              "validateFileHeaders": true,
              "validationTimeout": 10000
            }
          },
          "customSelectors": {
            "title": "Custom CSS Selectors",
            "type": "array",
            "description": "Define custom CSS selectors for media extraction",
            "items": {
              "type": "object",
              "properties": {
                "name": {
                  "title": "Selector Name",
                  "type": "string",
                  "description": "Unique name for this selector",
                  "editor": "textfield"
                },
                "selector": {
                  "title": "CSS Selector",
                  "type": "string",
                  "description": "CSS selector to find elements",
                  "editor": "textarea"
                },
                "mediaType": {
                  "title": "Media Type",
                  "type": "string",
                  "description": "Type of media this selector finds",
                  "editor": "textfield"
                },
                "srcAttribute": {
                  "title": "Source Attribute",
                  "type": "string",
                  "description": "Attribute containing the media URL",
                  "editor": "textfield",
                  "default": "src"
                },
                "altAttribute": {
                  "title": "Alt Text Attribute",
                  "type": "string",
                  "description": "Attribute containing description text",
                  "editor": "textfield",
                  "default": "alt"
                }
              },
              "required": [
                "name",
                "selector",
                "mediaType"
              ]
            },
            "default": []
          },
          "batchProcessing": {
            "title": "Batch Processing Options",
            "type": "object",
            "description": "Configure batch processing for large URL lists",
            "properties": {
              "enableBatchProcessing": {
                "title": "Enable Batch Processing",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Process URLs in batches for better performance and reliability",
                "default": true
              },
              "batchSize": {
                "title": "Batch Size",
                "type": "integer",
                "description": "Number of URLs to process in each batch",
                "editor": "number",
                "default": 10,
                "minimum": 1,
                "maximum": 100
              },
              "concurrency": {
                "title": "Concurrent Requests",
                "type": "integer",
                "description": "Number of URLs to process simultaneously within each batch",
                "editor": "number",
                "default": 3,
                "minimum": 1,
                "maximum": 10
              },
              "delayBetweenBatches": {
                "title": "Delay Between Batches (ms)",
                "type": "integer",
                "description": "Delay between processing batches in milliseconds",
                "editor": "number",
                "default": 1000,
                "minimum": 0,
                "maximum": 10000
              },
              "maxRetries": {
                "title": "Maximum Retries",
                "type": "integer",
                "description": "Maximum number of retries for failed URLs",
                "editor": "number",
                "default": 3,
                "minimum": 0,
                "maximum": 10
              },
              "failureThreshold": {
                "title": "Failure Threshold (0-1)",
                "type": "number",
                "description": "Stop processing if failure rate exceeds this threshold",
                "editor": "number",
                "default": 0.5,
                "minimum": 0,
                "maximum": 1
              },
              "enableProgressTracking": {
                "title": "Enable Progress Tracking",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Track and save processing progress for resumption",
                "default": true
              },
              "resumeFromLastBatch": {
                "title": "Resume from Last Batch",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Resume processing from the last completed batch if interrupted",
                "default": true
              }
            },
            "default": {
              "enableBatchProcessing": true,
              "batchSize": 10,
              "concurrency": 3,
              "delayBetweenBatches": 1000,
              "maxRetries": 3,
              "failureThreshold": 0.5,
              "enableProgressTracking": true,
              "resumeFromLastBatch": true
            }
          },
          "urlListManagement": {
            "title": "URL List Management",
            "type": "object",
            "description": "Configure URL list processing and filtering",
            "properties": {
              "enableDeduplication": {
                "title": "Remove Duplicate URLs",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Remove duplicate URLs from the input list",
                "default": true
              },
              "enableValidation": {
                "title": "Validate URLs",
                "type": "boolean",
                "editor": "checkbox",
                "description": "Validate URL format and accessibility",
                "default": true
              },
              "maxUrlsPerBatch": {
                "title": "Maximum URLs per Batch",
                "type": "integer",
                "description": "Maximum number of URLs to process in a single run",
                "editor": "number",
                "default": 1000,
                "minimum": 1,
                "maximum": 50000
              },
              "blockedDomains": {
                "title": "Blocked Domains",
                "type": "array",
                "description": "Domains to exclude from processing",
                "editor": "requestListSources",
                "items": {
                  "type": "object"
                },
                "default": []
              },
              "allowedDomains": {
                "title": "Allowed Domains",
                "type": "array",
                "description": "Only process URLs from these domains (leave empty for all)",
                "editor": "requestListSources",
                "items": {
                  "type": "object"
                },
                "default": []
              },
              "urlPatterns": {
                "title": "URL Pattern Filters",
                "type": "object",
                "editor": "json",
                "description": "Include/exclude URLs based on patterns",
                "properties": {
                  "includePatterns": {
                    "title": "Include Patterns",
                    "type": "array",
                    "description": "Only process URLs matching these regex patterns",
                    "editor": "json",
                    "items": {
                      "type": "string"
                    },
                    "default": []
                  },
                  "excludePatterns": {
                    "title": "Exclude Patterns",
                    "type": "array",
                    "description": "Skip URLs matching these regex patterns",
                    "editor": "json",
                    "items": {
                      "type": "string"
                    },
                    "default": []
                  }
                },
                "default": {
                  "includePatterns": [],
                  "excludePatterns": []
                }
              }
            },
            "default": {
              "enableDeduplication": true,
              "enableValidation": true,
              "maxUrlsPerBatch": 1000,
              "blockedDomains": [],
              "allowedDomains": [],
              "urlPatterns": {
                "includePatterns": [],
                "excludePatterns": []
              }
            }
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}