{
  "openapi": "3.0.1",
  "info": {
    "title": "Data Bridge",
    "description": "Turn messy data into clean records for HubSpot, Salesforce, Airtable, SQL, Google Sheets, or any custom schema. Just point it at your data and pick a target format. AI figures out which fields go where, normalizes emails or phone numbers, parses dates, removes duplicates, and validates the output.",
    "version": "0.0",
    "x-build-id": "BsOKT2wBxu3oFPjaX"
  },
  "servers": [
    {
      "url": "https://api.apify.com/v2"
    }
  ],
  "paths": {
    "/acts/filip_cicvarek~data-bridge/run-sync-get-dataset-items": {
      "post": {
        "operationId": "run-sync-get-dataset-items-filip_cicvarek-data-bridge",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for its completion, and returns Actor's dataset items in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    },
    "/acts/filip_cicvarek~data-bridge/runs": {
      "post": {
        "operationId": "runs-sync-filip_cicvarek-data-bridge",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor and returns information about the initiated run in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/runsResponseSchema"
                }
              }
            }
          }
        }
      }
    },
    "/acts/filip_cicvarek~data-bridge/run-sync": {
      "post": {
        "operationId": "run-sync-filip_cicvarek-data-bridge",
        "x-openai-isConsequential": false,
        "summary": "Executes an Actor, waits for completion, and returns the OUTPUT from Key-value store in response.",
        "tags": [
          "Run Actor"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/inputSchema"
              }
            }
          }
        },
        "parameters": [
          {
            "name": "token",
            "in": "query",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "Enter your Apify token here"
          }
        ],
        "responses": {
          "200": {
            "description": "OK"
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "inputSchema": {
        "type": "object",
        "required": [
          "sourceType",
          "targetSchemaType"
        ],
        "properties": {
          "sourceType": {
            "title": "Where is your data?",
            "enum": [
              "dataset",
              "url",
              "raw"
            ],
            "type": "string",
            "description": "Choose where to load the source data from. Pick 'Apify Dataset' if your data comes from another Actor run, 'URL' if you have a file hosted online, or 'Paste JSON' to enter data directly.",
            "default": "dataset"
          },
          "datasetIds": {
            "title": "Dataset IDs",
            "type": "array",
            "description": "Paste one or more Dataset IDs from previous Actor runs. You can find the Dataset ID in the Actor run's Storage tab. If you provide multiple IDs, all records are combined into one dataset before transformation.",
            "items": {
              "type": "string"
            }
          },
          "sourceUrl": {
            "title": "File URL",
            "type": "string",
            "description": "Full URL to a JSON, JSONL, or CSV file."
          },
          "rawData": {
            "title": "Paste your JSON data",
            "type": "string",
            "description": "Paste a JSON array of objects here."
          },
          "targetSchemaType": {
            "title": "What format do you need the output in?",
            "enum": [
              "preset",
              "example",
              "manual"
            ],
            "type": "string",
            "description": "Choose 'Platform Preset' to use a ready-made schema for popular tools like HubSpot or Salesforce. Choose 'Example Record' to paste one sample of what you want the output to look like. Choose 'Manual' if you want to define every field yourself.",
            "default": "preset"
          },
          "preset": {
            "title": "Platform preset",
            "enum": [
              "hubspot-contact",
              "salesforce-lead",
              "airtable-row",
              "sql-insert",
              "google-sheets-row",
              "custom-json"
            ],
            "type": "string",
            "description": "Pick your destination platform. The Actor will use that platform's standard field names so you can import directly. Choose 'Custom JSON' if you just want to clean/deduplicate your data without changing the field names.",
            "default": "hubspot-contact"
          },
          "targetExample": {
            "title": "Example output record",
            "type": "object",
            "description": "Paste one record that looks exactly like you want the output to look. The Actor will infer the field names and types from this example, then map your source data to match it."
          },
          "targetSchema": {
            "title": "Manual field definitions",
            "type": "object",
            "description": "Define each output field with its name, type, and whether it's required. See the example for the expected format."
          },
          "openaiApiKey": {
            "title": "OpenAI API Key",
            "type": "string",
            "description": "The Actor uses AI to automatically figure out how your source fields map to the target fields (e.g., it knows that 'company_name' should go into 'Company'). Paste your OpenAI API key here to enable this. You can get one at platform.openai.com/api-keys. Costs ~$0.001 per run. If you don't provide a key, you must map all fields manually in the section below."
          },
          "llmModel": {
            "title": "AI model",
            "enum": [
              "gpt-4o-mini",
              "gpt-4o",
              "gpt-4.1-mini",
              "gpt-4.1-nano"
            ],
            "type": "string",
            "description": "GPT-4o Mini works great for most cases and costs ~$0.001 per run. Use GPT-4o if your schemas are complex or field names are ambiguous.",
            "default": "gpt-4o-mini"
          },
          "fieldMappings": {
            "title": "Manual field mapping",
            "type": "object",
            "description": "Tell the Actor exactly which source fields map to which target fields. The left side (key) is your source field name, the right side (value) is the target field name it should map to. You can combine this with AI mapping -- your manual mappings always take priority over AI suggestions."
          },
          "normalizeEmails": {
            "title": "Lowercase all emails",
            "type": "boolean",
            "description": "Converts 'JOHN@EXAMPLE.COM' to 'john@example.com' and removes extra whitespace. Applied to all fields that map to an email-type target field.",
            "default": true
          },
          "formatPhones": {
            "title": "Standardize phone numbers",
            "type": "boolean",
            "description": "Converts phone numbers like '(555) 123-4567' or '555.123.4567' into a consistent format. Choose the format below.",
            "default": true
          },
          "normalizeDates": {
            "title": "Standardize dates",
            "type": "boolean",
            "description": "Parses dates in any format ('Jan 15, 2026', '15/01/2026', '2026-01-15') and converts them to a consistent format. Choose the format below.",
            "default": true
          },
          "trimAllWhitespace": {
            "title": "Clean up whitespace",
            "type": "boolean",
            "description": "Removes leading/trailing spaces and fixes double spaces in all text fields. Turns '  John   Doe  ' into 'John Doe'.",
            "default": true
          },
          "phoneFormat": {
            "title": "Phone number format",
            "enum": [
              "E164",
              "NATIONAL",
              "INTERNATIONAL",
              "RAW"
            ],
            "type": "string",
            "description": "How should phone numbers look in the output?",
            "default": "E164"
          },
          "defaultCountryCode": {
            "title": "Default country for phone numbers",
            "type": "string",
            "description": "When a phone number doesn't include a country code (e.g., '5551234567'), which country should be assumed? Use a 2-letter code: US, GB, DE, FR, etc.",
            "default": "US"
          },
          "dateFormat": {
            "title": "Date format",
            "enum": [
              "ISO8601",
              "%Y-%m-%d",
              "%m/%d/%Y",
              "%d/%m/%Y",
              "%d.%m.%Y",
              "%B %d, %Y",
              "UNIX_TIMESTAMP"
            ],
            "type": "string",
            "description": "How should dates look in the output?",
            "default": "ISO8601"
          },
          "deduplicationKeys": {
            "title": "Remove duplicates based on",
            "type": "array",
            "description": "Enter one or more target field names. Records with identical values in ALL listed fields are considered duplicates -- only the first occurrence is kept. For example, enter 'email' to remove rows with the same email address. Leave empty to keep all records.",
            "default": [],
            "items": {
              "type": "string"
            }
          },
          "strictMode": {
            "title": "Drop invalid records",
            "type": "boolean",
            "description": "When enabled, records that fail validation (missing required fields, wrong types) are removed from the output. When disabled (default), invalid records stay in the output but are marked with _bridgeStatus: 'error' so you can filter them later.",
            "default": false
          },
          "validationRules": {
            "title": "Custom validation rules",
            "type": "object",
            "description": "Add extra constraints beyond the target schema defaults. Keys are target field names, values are constraint objects. Supported constraints: minLength, maxLength, pattern (regex), min, max, enum (list of allowed values)."
          },
          "transformationRules": {
            "title": "Custom transformation rules",
            "type": "array",
            "description": "For advanced users who need full control over individual field transformations. Each rule specifies a source field, target field, transform function, and optional parameters. These override both AI and manual field mappings for the specified target field.",
            "default": []
          },
          "maxRows": {
            "title": "Max rows to process",
            "minimum": 0,
            "type": "integer",
            "description": "Process only the first N rows. Useful for testing with a small sample before running on the full dataset. Set to 0 to process everything.",
            "default": 0
          },
          "batchSize": {
            "title": "Batch size",
            "minimum": 10,
            "maximum": 10000,
            "type": "integer",
            "description": "How many records to process at a time. The default of 100 works well for most cases. Increase for faster processing on large datasets, decrease if you run into memory issues.",
            "default": 100
          }
        }
      },
      "runsResponseSchema": {
        "type": "object",
        "properties": {
          "data": {
            "type": "object",
            "properties": {
              "id": {
                "type": "string"
              },
              "actId": {
                "type": "string"
              },
              "userId": {
                "type": "string"
              },
              "startedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "finishedAt": {
                "type": "string",
                "format": "date-time",
                "example": "2025-01-08T00:00:00.000Z"
              },
              "status": {
                "type": "string",
                "example": "READY"
              },
              "meta": {
                "type": "object",
                "properties": {
                  "origin": {
                    "type": "string",
                    "example": "API"
                  },
                  "userAgent": {
                    "type": "string"
                  }
                }
              },
              "stats": {
                "type": "object",
                "properties": {
                  "inputBodyLen": {
                    "type": "integer",
                    "example": 2000
                  },
                  "rebootCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "restartCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "resurrectCount": {
                    "type": "integer",
                    "example": 0
                  },
                  "computeUnits": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "options": {
                "type": "object",
                "properties": {
                  "build": {
                    "type": "string",
                    "example": "latest"
                  },
                  "timeoutSecs": {
                    "type": "integer",
                    "example": 300
                  },
                  "memoryMbytes": {
                    "type": "integer",
                    "example": 1024
                  },
                  "diskMbytes": {
                    "type": "integer",
                    "example": 2048
                  }
                }
              },
              "buildId": {
                "type": "string"
              },
              "defaultKeyValueStoreId": {
                "type": "string"
              },
              "defaultDatasetId": {
                "type": "string"
              },
              "defaultRequestQueueId": {
                "type": "string"
              },
              "buildNumber": {
                "type": "string",
                "example": "1.0.0"
              },
              "containerUrl": {
                "type": "string"
              },
              "usage": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "integer",
                    "example": 1
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              },
              "usageTotalUsd": {
                "type": "number",
                "example": 0.00005
              },
              "usageUsd": {
                "type": "object",
                "properties": {
                  "ACTOR_COMPUTE_UNITS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATASET_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "KEY_VALUE_STORE_WRITES": {
                    "type": "number",
                    "example": 0.00005
                  },
                  "KEY_VALUE_STORE_LISTS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_READS": {
                    "type": "integer",
                    "example": 0
                  },
                  "REQUEST_QUEUE_WRITES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_INTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "DATA_TRANSFER_EXTERNAL_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_RESIDENTIAL_TRANSFER_GBYTES": {
                    "type": "integer",
                    "example": 0
                  },
                  "PROXY_SERPS": {
                    "type": "integer",
                    "example": 0
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}