diff --git a/airbyte_cdk/cli/airbyte_cdk/__init__.py b/airbyte_cdk/cli/airbyte_cdk/__init__.py index b4365fe10..898dc1d27 100644 --- a/airbyte_cdk/cli/airbyte_cdk/__init__.py +++ b/airbyte_cdk/cli/airbyte_cdk/__init__.py @@ -44,6 +44,7 @@ from airbyte_cdk.cli.airbyte_cdk._connector import connector_cli_group from airbyte_cdk.cli.airbyte_cdk._image import image_cli_group from airbyte_cdk.cli.airbyte_cdk._manifest import manifest_cli_group +from airbyte_cdk.cli.airbyte_cdk._metadata import metadata_cli_group from airbyte_cdk.cli.airbyte_cdk._secrets import secrets_cli_group from airbyte_cdk.cli.airbyte_cdk._version import print_version @@ -78,6 +79,7 @@ def cli( cli.add_command(connector_cli_group) cli.add_command(manifest_cli_group) +cli.add_command(metadata_cli_group) cli.add_command(image_cli_group) cli.add_command(secrets_cli_group) diff --git a/airbyte_cdk/cli/airbyte_cdk/_metadata.py b/airbyte_cdk/cli/airbyte_cdk/_metadata.py new file mode 100644 index 000000000..d5c4d9151 --- /dev/null +++ b/airbyte_cdk/cli/airbyte_cdk/_metadata.py @@ -0,0 +1,75 @@ +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +"""CLI commands for metadata validation.""" + +import json +import sys +from pathlib import Path + +import rich_click as click + +from airbyte_cdk.models.connector_metadata import validate_metadata_file + + +@click.group(name="metadata") +def metadata_cli_group() -> None: + """Commands for working with connector metadata.""" + pass + + +@metadata_cli_group.command(name="validate") +@click.option( + "--file", + "-f", + "file_path", + type=click.Path(exists=True, path_type=Path), + required=True, + help="Path to the metadata.yaml file to validate", +) +@click.option( + "--schema", + "-s", + "schema_source", + type=str, + default=None, + help="URL or file path to JSON schema (defaults to monorepo schema)", +) +@click.option( + "--format", + "output_format", + type=click.Choice(["json", "text"]), + default="text", + help="Output format (json or text)", +) +def validate_command(file_path: Path, schema_source: str | None, output_format: str) -> None: + """Validate a connector metadata.yaml file. + + This command validates a metadata.yaml file against the connector metadata schema + and reports any validation errors. + + Examples: + airbyte-cdk metadata validate --file metadata.yaml + airbyte-cdk metadata validate --file metadata.yaml --format json + airbyte-cdk metadata validate --file metadata.yaml --schema /path/to/schema.json + """ + result = validate_metadata_file(file_path, schema_source) + + if output_format == "json": + click.echo(result.model_dump_json(indent=2)) + else: + if result.valid: + click.secho("✓ Metadata file is valid", fg="green") + else: + click.secho("✗ Metadata file is invalid", fg="red") + click.echo() + click.echo("Errors:") + for error in result.errors: + error_type = error.get("type", "unknown") + path = error.get("path", "") + message = error.get("message", "") + + if path: + click.echo(f" • {path}: {message} (type: {error_type})") + else: + click.echo(f" • {message} (type: {error_type})") + + sys.exit(0 if result.valid else 1) diff --git a/airbyte_cdk/models/connector_metadata.py b/airbyte_cdk/models/connector_metadata.py deleted file mode 100644 index 7f945aadc..000000000 --- a/airbyte_cdk/models/connector_metadata.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Models to represent the structure of a `metadata.yaml` file.""" - -from __future__ import annotations - -from enum import Enum -from pathlib import Path - -import yaml -from pydantic import BaseModel, Field - - -class ConnectorLanguage(str, Enum): - """Connector implementation language.""" - - PYTHON = "python" - JAVA = "java" - LOW_CODE = "low-code" - MANIFEST_ONLY = "manifest-only" - UNKNOWN = "unknown" - - -class ConnectorBuildOptions(BaseModel): - """Connector build options from metadata.yaml.""" - - model_config = {"extra": "allow"} - - baseImage: str | None = Field( - None, - description="Base image to use for building the connector", - ) - path: str | None = Field( - None, - description="Path to the connector code within the repository", - ) - - -class SuggestedStreams(BaseModel): - """Suggested streams from metadata.yaml.""" - - streams: list[str] = Field( - default=[], - description="List of suggested streams for the connector", - ) - - -class ConnectorMetadata(BaseModel): - """Connector metadata from metadata.yaml.""" - - model_config = {"extra": "allow"} - - dockerRepository: str = Field(..., description="Docker repository for the connector image") - dockerImageTag: str = Field(..., description="Docker image tag for the connector") - - tags: list[str] = Field( - default=[], - description="List of tags for the connector", - ) - - suggestedStreams: SuggestedStreams | None = Field( - default=None, - description="Suggested streams for the connector", - ) - - @property - def language(self) -> ConnectorLanguage: - """Get the connector language.""" - for tag in self.tags: - if tag.startswith("language:"): - language = tag.split(":", 1)[1] - if language == "python": - return ConnectorLanguage.PYTHON - elif language == "java": - return ConnectorLanguage.JAVA - elif language == "low-code": - return ConnectorLanguage.LOW_CODE - elif language == "manifest-only": - return ConnectorLanguage.MANIFEST_ONLY - - return ConnectorLanguage.UNKNOWN - - connectorBuildOptions: ConnectorBuildOptions | None = Field( - None, description="Options for building the connector" - ) - - -class MetadataFile(BaseModel): - """Represents the structure of a metadata.yaml file.""" - - model_config = {"extra": "allow"} - - data: ConnectorMetadata = Field(..., description="Connector metadata") - - @classmethod - def from_file( - cls, - file_path: Path, - ) -> MetadataFile: - """Load metadata from a YAML file.""" - if not file_path.exists(): - raise FileNotFoundError(f"Metadata file not found: {file_path!s}") - - metadata_content = file_path.read_text() - metadata_dict = yaml.safe_load(metadata_content) - - if not metadata_dict or "data" not in metadata_dict: - raise ValueError( - "Invalid metadata format: missing 'data' field in YAML file '{file_path!s}'" - ) - - metadata_file = MetadataFile.model_validate(metadata_dict) - return metadata_file diff --git a/airbyte_cdk/models/connector_metadata/__init__.py b/airbyte_cdk/models/connector_metadata/__init__.py new file mode 100644 index 000000000..2e08ecf0b --- /dev/null +++ b/airbyte_cdk/models/connector_metadata/__init__.py @@ -0,0 +1,23 @@ +"""Connector metadata models and validation.""" + +from airbyte_cdk.models.connector_metadata.metadata_file import ( + ConnectorBuildOptions, + ConnectorLanguage, + ConnectorMetadata, + MetadataFile, + SuggestedStreams, + ValidationResult, + get_metadata_schema, + validate_metadata_file, +) + +__all__ = [ + "ConnectorBuildOptions", + "ConnectorLanguage", + "ConnectorMetadata", + "MetadataFile", + "SuggestedStreams", + "ValidationResult", + "get_metadata_schema", + "validate_metadata_file", +] diff --git a/airbyte_cdk/models/connector_metadata/metadata_file.py b/airbyte_cdk/models/connector_metadata/metadata_file.py new file mode 100644 index 000000000..809f5aa19 --- /dev/null +++ b/airbyte_cdk/models/connector_metadata/metadata_file.py @@ -0,0 +1,248 @@ +"""Models to represent the structure of a `metadata.yaml` file.""" + +from __future__ import annotations + +import json +from enum import Enum +from pathlib import Path +from typing import Any, cast +from urllib.request import urlopen + +import jsonschema +import yaml +from pydantic import BaseModel, Field, ValidationError + +# TODO: Update to master branch URL after associated PR merges +# https://raw.githubusercontent.com/airbytehq/airbyte/master/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorMetadataDefinitionV0.json +DEFAULT_SCHEMA_URL = "https://raw.githubusercontent.com/airbytehq/airbyte/61048d88732df93c50bd3da490de8d3cc1aa66b0/airbyte-ci/connectors/metadata_service/lib/metadata_service/models/generated/ConnectorMetadataDefinitionV0.json" + + +class ConnectorLanguage(str, Enum): + """Connector implementation language.""" + + PYTHON = "python" + JAVA = "java" + LOW_CODE = "low-code" + MANIFEST_ONLY = "manifest-only" + UNKNOWN = "unknown" + + +class ConnectorBuildOptions(BaseModel): + """Connector build options from metadata.yaml.""" + + model_config = {"extra": "allow"} + + baseImage: str | None = Field( + None, + description="Base image to use for building the connector", + ) + path: str | None = Field( + None, + description="Path to the connector code within the repository", + ) + + +class SuggestedStreams(BaseModel): + """Suggested streams from metadata.yaml.""" + + streams: list[str] = Field( + default=[], + description="List of suggested streams for the connector", + ) + + +class ConnectorMetadata(BaseModel): + """Connector metadata from metadata.yaml.""" + + model_config = {"extra": "allow"} + + dockerRepository: str = Field(..., description="Docker repository for the connector image") + dockerImageTag: str = Field(..., description="Docker image tag for the connector") + + tags: list[str] = Field( + default=[], + description="List of tags for the connector", + ) + + suggestedStreams: SuggestedStreams | None = Field( + default=None, + description="Suggested streams for the connector", + ) + + @property + def language(self) -> ConnectorLanguage: + """Get the connector language.""" + for tag in self.tags: + if tag.startswith("language:"): + language = tag.split(":", 1)[1] + if language == "python": + return ConnectorLanguage.PYTHON + elif language == "java": + return ConnectorLanguage.JAVA + elif language == "low-code": + return ConnectorLanguage.LOW_CODE + elif language == "manifest-only": + return ConnectorLanguage.MANIFEST_ONLY + + return ConnectorLanguage.UNKNOWN + + connectorBuildOptions: ConnectorBuildOptions | None = Field( + None, description="Options for building the connector" + ) + + +class MetadataFile(BaseModel): + """Represents the structure of a metadata.yaml file.""" + + model_config = {"extra": "allow"} + + data: ConnectorMetadata = Field(..., description="Connector metadata") + + @classmethod + def from_file( + cls, + file_path: Path, + ) -> MetadataFile: + """Load metadata from a YAML file.""" + if not file_path.exists(): + raise FileNotFoundError(f"Metadata file not found: {file_path!s}") + + metadata_content = file_path.read_text() + metadata_dict = yaml.safe_load(metadata_content) + + if not metadata_dict or "data" not in metadata_dict: + raise ValueError( + "Invalid metadata format: missing 'data' field in YAML file '{file_path!s}'" + ) + + metadata_file = MetadataFile.model_validate(metadata_dict) + return metadata_file + + +class ValidationResult(BaseModel): + """Result of metadata validation.""" + + valid: bool = Field(..., description="Whether the metadata is valid") + errors: list[dict[str, Any]] = Field( + default_factory=list, description="List of validation errors" + ) + metadata: dict[str, Any] | None = Field(None, description="Parsed metadata if available") + + +def get_metadata_schema(schema_source: str | Path | None = None) -> dict[str, Any]: + """Load metadata JSON schema from URL or file path. + + Args: + schema_source: URL or file path to JSON schema. If None, uses DEFAULT_SCHEMA_URL. + + Returns: + Parsed JSON schema as dictionary + """ + if schema_source is None: + schema_source = DEFAULT_SCHEMA_URL + + if isinstance(schema_source, Path) or ( + isinstance(schema_source, str) and not schema_source.startswith(("http://", "https://")) + ): + schema_path = Path(schema_source) + if not schema_path.exists(): + raise FileNotFoundError(f"Schema file not found: {schema_path}") + return cast(dict[str, Any], json.loads(schema_path.read_text())) + + try: + with urlopen(schema_source, timeout=10) as response: + return cast(dict[str, Any], json.loads(response.read().decode("utf-8"))) + except Exception as e: + raise RuntimeError(f"Failed to fetch schema from {schema_source}: {e}") from e + + +def validate_metadata_file( + file_path: Path, schema_source: str | Path | None = None +) -> ValidationResult: + """Validate a metadata.yaml file against JSON schema. + + Args: + file_path: Path to the metadata.yaml file to validate + schema_source: URL or file path to JSON schema. If None, uses DEFAULT_SCHEMA_URL. + + Returns: + ValidationResult with validation status, errors, and parsed metadata + """ + errors = [] + metadata_dict = None + + try: + if not file_path.exists(): + return ValidationResult( + valid=False, + errors=[ + {"type": "file_not_found", "message": f"Metadata file not found: {file_path}"} + ], + metadata=None, + ) + + try: + metadata_content = file_path.read_text() + metadata_dict = yaml.safe_load(metadata_content) + except yaml.YAMLError as e: + return ValidationResult( + valid=False, + errors=[{"type": "yaml_parse_error", "message": f"Failed to parse YAML: {e}"}], + metadata=None, + ) + + if not metadata_dict or "data" not in metadata_dict: + return ValidationResult( + valid=False, + errors=[ + { + "type": "missing_field", + "path": "data", + "message": "Missing 'data' field in metadata", + } + ], + metadata=metadata_dict, + ) + + try: + schema = get_metadata_schema(schema_source) + except Exception as e: + return ValidationResult( + valid=False, + errors=[{"type": "schema_load_error", "message": f"Failed to load schema: {e}"}], + metadata=metadata_dict, + ) + + try: + jsonschema.validate(instance=metadata_dict, schema=schema) + return ValidationResult( + valid=True, + errors=[], + metadata=metadata_dict, + ) + except jsonschema.ValidationError as e: + errors.append( + { + "type": "validation_error", + "path": ".".join(str(p) for p in e.absolute_path) if e.absolute_path else "", + "message": e.message, + } + ) + return ValidationResult( + valid=False, + errors=errors, + metadata=metadata_dict, + ) + except jsonschema.SchemaError as e: + return ValidationResult( + valid=False, + errors=[{"type": "schema_error", "message": f"Invalid schema: {e.message}"}], + metadata=metadata_dict, + ) + + except Exception as e: + return ValidationResult( + valid=False, + errors=[{"type": "unexpected_error", "message": f"Unexpected error: {e}"}], + metadata=metadata_dict, + ) diff --git a/airbyte_cdk/test/models/connector_metadata/generated/metadata_schema.json b/airbyte_cdk/test/models/connector_metadata/generated/metadata_schema.json index aef8ce028..9187c310f 100644 --- a/airbyte_cdk/test/models/connector_metadata/generated/metadata_schema.json +++ b/airbyte_cdk/test/models/connector_metadata/generated/metadata_schema.json @@ -188,93 +188,62 @@ } }, "definitions": { - "TestConnections": { - "title": "TestConnections", - "description": "List of sandbox cloud connections that tests can be run against", - "type": "object", - "required": [ - "name", - "id" - ], - "additionalProperties": false, - "properties": { - "name": { - "type": "string", - "description": "The connection name" - }, - "id": { - "type": "string", - "description": "The connection ID" - } - } - }, - "SupportLevel": { - "title": "SupportLevel", + "ReleaseStage": { + "title": "ReleaseStage", "description": "enum that describes a connector's release stage", "type": "string", "enum": [ - "community", - "certified", - "archived" + "alpha", + "beta", + "generally_available", + "custom" ] }, - "SuggestedStreams": { - "title": "SuggestedStreams", - "description": "A source's suggested streams. These will be suggested by default for new connections using this source. Otherwise, all streams will be selected. This is useful for when your source has a lot of streams, but the average user will only want a subset of them synced.", - "type": "object", - "additionalProperties": true, - "properties": { - "streams": { - "type": "array", - "description": "An array of streams that this connector suggests the average user will want. SuggestedStreams not being present for the source means that all streams are suggested. An empty list here means that no streams are suggested.", - "items": { - "type": "string" - } - } - } - }, - "SourceFileInfo": { - "title": "SourceFileInfo", - "description": "Information about the source file that generated the registry entry", + "RolloutConfiguration": { + "title": "RolloutConfiguration", + "description": "configuration for the rollout of a connector", "type": "object", + "additionalProperties": false, "properties": { - "metadata_etag": { - "type": "string" - }, - "metadata_file_path": { - "type": "string" + "enableProgressiveRollout": { + "type": "boolean", + "default": false, + "description": "Whether to enable progressive rollout for the connector." }, - "metadata_bucket_name": { - "type": "string" + "initialPercentage": { + "type": "integer", + "minimum": 0, + "maximum": 100, + "default": 0, + "description": "The percentage of users that should receive the new version initially." }, - "metadata_last_modified": { - "type": "string" + "maxPercentage": { + "type": "integer", + "minimum": 0, + "maximum": 100, + "default": 50, + "description": "The percentage of users who should receive the release candidate during the test phase before full rollout." }, - "registry_entry_generated_at": { - "type": "string" + "advanceDelayMinutes": { + "type": "integer", + "minimum": 10, + "default": 10, + "description": "The number of minutes to wait before advancing the rollout percentage." } } }, - "SecretStore": { - "title": "SecretStore", - "description": "An object describing a secret store metadata", + "AllowedHosts": { + "title": "AllowedHosts", + "description": "A connector's allowed hosts. If present, the platform will limit communication to only hosts which are listed in `AllowedHosts.hosts`.", "type": "object", - "required": [ - "name", - "secretStore" - ], - "additionalProperties": false, + "additionalProperties": true, "properties": { - "alias": { - "type": "string", - "description": "The alias of the secret store which can map to its actual secret address" - }, - "type": { - "type": "string", - "description": "The type of the secret store", - "enum": [ - "GSM" - ] + "hosts": { + "type": "array", + "description": "An array of hosts that this connector can connect to. AllowedHosts not being present for the source or destination means that access to all hosts is allowed. An empty list here means that no network access is granted.", + "items": { + "type": "string" + } } } }, @@ -301,100 +270,83 @@ } } }, - "RolloutConfiguration": { - "title": "RolloutConfiguration", - "description": "configuration for the rollout of a connector", + "TestConnections": { + "title": "TestConnections", + "description": "List of sandbox cloud connections that tests can be run against", "type": "object", + "required": [ + "name", + "id" + ], "additionalProperties": false, "properties": { - "enableProgressiveRollout": { - "type": "boolean", - "default": false, - "description": "Whether to enable progressive rollout for the connector." - }, - "initialPercentage": { - "type": "integer", - "minimum": 0, - "maximum": 100, - "default": 0, - "description": "The percentage of users that should receive the new version initially." - }, - "maxPercentage": { - "type": "integer", - "minimum": 0, - "maximum": 100, - "default": 50, - "description": "The percentage of users who should receive the release candidate during the test phase before full rollout." + "name": { + "type": "string", + "description": "The connection name" }, - "advanceDelayMinutes": { - "type": "integer", - "minimum": 10, - "default": 10, - "description": "The number of minutes to wait before advancing the rollout percentage." + "id": { + "type": "string", + "description": "The connection ID" } } }, - "ResourceRequirements": { - "title": "ResourceRequirements", - "description": "generic configuration for pod source requirements", + "AirbyteInternal": { + "title": "AirbyteInternal", + "description": "Fields for internal use only", "type": "object", - "additionalProperties": false, + "additionalProperties": true, "properties": { - "cpu_request": { - "type": "string" + "sl": { + "type": "integer", + "enum": [ + 0, + 100, + 200, + 300 + ] }, - "cpu_limit": { - "type": "string" + "ql": { + "type": "integer", + "enum": [ + 0, + 100, + 200, + 300, + 400, + 500, + 600 + ] }, - "memory_request": { - "type": "string" + "isEnterprise": { + "type": "boolean", + "default": false }, - "memory_limit": { - "type": "string" + "requireVersionIncrementsInPullRequests": { + "type": "boolean", + "default": true, + "description": "When false, version increment checks will be skipped for this connector" } } }, - "PyPi": { - "title": "PyPi", - "description": "describes the PyPi publishing options", + "ConnectorReleases": { + "title": "ConnectorReleases", + "description": "Contains information about different types of releases for a connector.", "type": "object", "additionalProperties": false, - "required": [ - "enabled", - "packageName" - ], "properties": { - "enabled": { - "type": "boolean" + "rolloutConfiguration": { + "$ref": "#/definitions/RolloutConfiguration" }, - "packageName": { + "breakingChanges": { + "$ref": "#/definitions/ConnectorBreakingChanges" + }, + "migrationDocumentationUrl": { + "description": "URL to documentation on how to migrate from the previous version to the current version. Defaults to ${documentationUrl}-migrations", "type": "string", - "description": "The name of the package on PyPi." - } - } - }, - "RemoteRegistries": { - "title": "RemoteRegistries", - "description": "describes how the connector is published to remote registries", - "type": "object", - "additionalProperties": false, - "properties": { - "pypi": { - "$ref": "#/definitions/PyPi" + "format": "uri" } } }, - "ReleaseStage": { - "title": "ReleaseStage", - "description": "enum that describes a connector's release stage", - "type": "string", - "enum": [ - "alpha", - "beta", - "generally_available", - "custom" - ] - }, "RegistryOverrides": { "title": "RegistryOverrides", "description": "describes the overrides per registry of a connector", @@ -447,173 +399,53 @@ } } }, - "NormalizationDestinationDefinitionConfig": { - "title": "NormalizationDestinationDefinitionConfig", - "description": "describes a normalization config for destination definition", + "ConnectorBuildOptions": { + "title": "ConnectorBuildOptions", + "description": "metadata specific to the build process.", "type": "object", - "required": [ - "normalizationRepository", - "normalizationTag", - "normalizationIntegrationType" - ], - "additionalProperties": true, + "additionalProperties": false, "properties": { - "normalizationRepository": { - "type": "string", - "description": "a field indicating the name of the repository to be used for normalization. If the value of the flag is NULL - normalization is not used." - }, - "normalizationTag": { - "type": "string", - "description": "a field indicating the tag of the docker repository to be used for normalization." - }, - "normalizationIntegrationType": { - "type": "string", - "description": "a field indicating the type of integration dialect to use for normalization." - } - } - }, - "JobType": { - "title": "JobType", - "description": "enum that describes the different types of jobs that the platform runs.", - "type": "string", - "enum": [ - "get_spec", - "check_connection", - "discover_schema", - "sync", - "reset_connection", - "connection_updater", - "replicate" - ] - }, - "GitInfo": { - "title": "GitInfo", - "description": "Information about the author of the last commit that modified this file. DO NOT DEFINE THIS FIELD MANUALLY. It will be overwritten by the CI.", - "type": "object", - "additionalProperties": false, - "properties": { - "commit_sha": { - "type": "string", - "description": "The git commit sha of the last commit that modified this file." - }, - "commit_timestamp": { - "type": "string", - "format": "date-time", - "description": "The git commit timestamp of the last commit that modified this file." - }, - "commit_author": { - "type": "string", - "description": "The git commit author of the last commit that modified this file." - }, - "commit_author_email": { - "type": "string", - "description": "The git commit author email of the last commit that modified this file." - } - } - }, - "GeneratedFields": { - "title": "GeneratedFields", - "description": "Optional schema for fields generated at metadata upload time", - "type": "object", - "properties": { - "git": { - "$ref": "#/definitions/GitInfo" - }, - "source_file_info": { - "$ref": "#/definitions/SourceFileInfo" - }, - "metrics": { - "$ref": "#/definitions/ConnectorMetrics" - }, - "sbomUrl": { - "type": "string", - "description": "URL to the SBOM file" + "baseImage": { + "type": "string" } } }, - "ConnectorTestSuiteOptions": { - "title": "ConnectorTestSuiteOptions", - "description": "Options for a specific connector test suite.", + "ResourceRequirements": { + "title": "ResourceRequirements", + "description": "generic configuration for pod source requirements", "type": "object", - "required": [ - "suite" - ], "additionalProperties": false, "properties": { - "suite": { - "description": "Name of the configured test suite", - "type": "string", - "enum": [ - "unitTests", - "integrationTests", - "acceptanceTests", - "liveTests" - ] - }, - "testSecrets": { - "description": "List of secrets required to run the test suite", - "type": "array", - "items": { - "$ref": "#/definitions/Secret" - } + "cpu_request": { + "type": "string" }, - "testConnections": { - "description": "List of sandbox cloud connections that tests can be run against", - "type": "array", - "items": { - "$ref": "#/definitions/TestConnections" - } - } - } - }, - "ConnectorReleases": { - "title": "ConnectorReleases", - "description": "Contains information about different types of releases for a connector.", - "type": "object", - "additionalProperties": false, - "properties": { - "rolloutConfiguration": { - "$ref": "#/definitions/RolloutConfiguration" + "cpu_limit": { + "type": "string" }, - "breakingChanges": { - "$ref": "#/definitions/ConnectorBreakingChanges" + "memory_request": { + "type": "string" }, - "migrationDocumentationUrl": { - "description": "URL to documentation on how to migrate from the previous version to the current version. Defaults to ${documentationUrl}-migrations", - "type": "string", - "format": "uri" + "memory_limit": { + "type": "string" } } }, - "ConnectorRegistryV0": { - "title": "ConnectorRegistryV0", - "description": "describes the collection of connectors retrieved from a registry", + "ConnectorPackageInfo": { + "title": "ConnectorPackageInfo", + "description": "Information about the contents of the connector image", "type": "object", - "required": [ - "destinations", - "sources" - ], "properties": { - "destinations": { - "type": "array", - "items": { - "$ref": "#/definitions/ConnectorRegistryDestinationDefinition" - } - }, - "sources": { - "type": "array", - "items": { - "$ref": "#/definitions/ConnectorRegistrySourceDefinition" - } + "cdk_version": { + "type": "string" } } }, - "ConnectorRegistrySourceDefinition": { - "title": "ConnectorRegistrySourceDefinition", - "description": "describes a source", + "ConnectorRegistryDestinationDefinition": { + "title": "ConnectorRegistryDestinationDefinition", + "description": "describes a destination", "type": "object", "required": [ - "sourceDefinitionId", + "destinationDefinitionId", "name", "dockerRepository", "dockerImageTag", @@ -622,7 +454,7 @@ ], "additionalProperties": true, "properties": { - "sourceDefinitionId": { + "destinationDefinitionId": { "type": "string", "format": "uuid" }, @@ -644,15 +476,6 @@ "iconUrl": { "type": "string" }, - "sourceType": { - "type": "string", - "enum": [ - "api", - "file", - "database", - "custom" - ] - }, "spec": { "type": "object" }, @@ -682,6 +505,13 @@ "type": "string", "format": "date" }, + "tags": { + "type": "array", + "description": "An array of tags that describe the connector. E.g: language:python, keyword:rds, etc.", + "items": { + "type": "string" + } + }, "resourceRequirements": { "$ref": "#/definitions/ActorDefinitionResourceRequirements" }, @@ -689,19 +519,15 @@ "type": "string", "description": "the Airbyte Protocol version supported by the connector" }, - "allowedHosts": { - "$ref": "#/definitions/AllowedHosts" - }, - "suggestedStreams": { - "$ref": "#/definitions/SuggestedStreams" + "normalizationConfig": { + "$ref": "#/definitions/NormalizationDestinationDefinitionConfig" }, - "maxSecondsBetweenMessages": { - "description": "Number of seconds allowed between 2 airbyte protocol messages. The source will timeout if this delay is reach", - "type": "integer" + "supportsDbt": { + "type": "boolean", + "description": "an optional flag indicating whether DBT is used in the normalization. If the flag value is NULL - DBT is not used." }, - "erdUrl": { - "type": "string", - "description": "The URL where you can visualize the ERD" + "allowedHosts": { + "$ref": "#/definitions/AllowedHosts" }, "releases": { "$ref": "#/definitions/ConnectorRegistryReleases" @@ -709,15 +535,9 @@ "ab_internal": { "$ref": "#/definitions/AirbyteInternal" }, - "generated": { - "$ref": "#/definitions/GeneratedFields" - }, - "packageInfo": { - "$ref": "#/definitions/ConnectorPackageInfo" - }, - "language": { - "type": "string", - "description": "The language the connector is written in" + "supportsRefreshes": { + "type": "boolean", + "default": false }, "supportsFileTransfer": { "type": "boolean", @@ -726,186 +546,20 @@ "supportsDataActivation": { "type": "boolean", "default": false - } - } - }, - "ConnectorReleaseCandidates": { - "description": "Each entry denotes a release candidate version of a connector.", - "type": "object", - "additionalProperties": false, - "minProperties": 1, - "maxProperties": 1, - "patternProperties": { - "^\\d+\\.\\d+\\.\\d+(-[0-9A-Za-z-.]+)?$": { - "$ref": "#/definitions/VersionReleaseCandidate" - } - } - }, - "VersionReleaseCandidate": { - "description": "Contains information about a release candidate version of a connector.", - "additionalProperties": false, - "type": "object", - "oneOf": [ - { - "$ref": "#/definitions/ConnectorRegistrySourceDefinition" }, - { - "$ref": "#/definitions/ConnectorRegistryDestinationDefinition" - } - ] - }, - "ConnectorRegistryReleases": { - "title": "ConnectorRegistryReleases", - "description": "Contains information about different types of releases for a connector.", - "type": "object", - "additionalProperties": false, - "properties": { - "releaseCandidates": { - "$ref": "#/definitions/ConnectorReleaseCandidates" - }, - "rolloutConfiguration": { - "$ref": "#/definitions/RolloutConfiguration" + "generated": { + "$ref": "#/definitions/GeneratedFields" }, - "breakingChanges": { - "$ref": "#/definitions/ConnectorBreakingChanges" + "packageInfo": { + "$ref": "#/definitions/ConnectorPackageInfo" }, - "migrationDocumentationUrl": { - "description": "URL to documentation on how to migrate from the previous version to the current version. Defaults to ${documentationUrl}-migrations", + "language": { "type": "string", - "format": "uri" + "description": "The language the connector is written in" } } }, - "ConnectorRegistryDestinationDefinition": { - "title": "ConnectorRegistryDestinationDefinition", - "description": "describes a destination", - "type": "object", - "required": [ - "destinationDefinitionId", - "name", - "dockerRepository", - "dockerImageTag", - "documentationUrl", - "spec" - ], - "additionalProperties": true, - "properties": { - "destinationDefinitionId": { - "type": "string", - "format": "uuid" - }, - "name": { - "type": "string" - }, - "dockerRepository": { - "type": "string" - }, - "dockerImageTag": { - "type": "string" - }, - "documentationUrl": { - "type": "string" - }, - "icon": { - "type": "string" - }, - "iconUrl": { - "type": "string" - }, - "spec": { - "type": "object" - }, - "tombstone": { - "description": "if false, the configuration is active. if true, then this configuration is permanently off.", - "type": "boolean", - "default": false - }, - "public": { - "description": "true if this connector definition is available to all workspaces", - "type": "boolean", - "default": false - }, - "custom": { - "description": "whether this is a custom connector definition", - "type": "boolean", - "default": false - }, - "releaseStage": { - "$ref": "#/definitions/ReleaseStage" - }, - "supportLevel": { - "$ref": "#/definitions/SupportLevel" - }, - "releaseDate": { - "description": "The date when this connector was first released, in yyyy-mm-dd format.", - "type": "string", - "format": "date" - }, - "tags": { - "type": "array", - "description": "An array of tags that describe the connector. E.g: language:python, keyword:rds, etc.", - "items": { - "type": "string" - } - }, - "resourceRequirements": { - "$ref": "#/definitions/ActorDefinitionResourceRequirements" - }, - "protocolVersion": { - "type": "string", - "description": "the Airbyte Protocol version supported by the connector" - }, - "normalizationConfig": { - "$ref": "#/definitions/NormalizationDestinationDefinitionConfig" - }, - "supportsDbt": { - "type": "boolean", - "description": "an optional flag indicating whether DBT is used in the normalization. If the flag value is NULL - DBT is not used." - }, - "allowedHosts": { - "$ref": "#/definitions/AllowedHosts" - }, - "releases": { - "$ref": "#/definitions/ConnectorRegistryReleases" - }, - "ab_internal": { - "$ref": "#/definitions/AirbyteInternal" - }, - "supportsRefreshes": { - "type": "boolean", - "default": false - }, - "supportsFileTransfer": { - "type": "boolean", - "default": false - }, - "supportsDataActivation": { - "type": "boolean", - "default": false - }, - "generated": { - "$ref": "#/definitions/GeneratedFields" - }, - "packageInfo": { - "$ref": "#/definitions/ConnectorPackageInfo" - }, - "language": { - "type": "string", - "description": "The language the connector is written in" - } - } - }, - "ConnectorPackageInfo": { - "title": "ConnectorPackageInfo", - "description": "Information about the contents of the connector image", - "type": "object", - "properties": { - "cdk_version": { - "type": "string" - } - } - }, - "ConnectorMetric": { + "ConnectorMetric": { "type": "object", "properties": { "usage": { @@ -960,99 +614,278 @@ } } }, - "ConnectorIPCOptions": { - "title": "ConnectorIPCOptions", + "NormalizationDestinationDefinitionConfig": { + "title": "NormalizationDestinationDefinitionConfig", + "description": "describes a normalization config for destination definition", "type": "object", "required": [ - "dataChannel" + "normalizationRepository", + "normalizationTag", + "normalizationIntegrationType" ], - "additionalProperties": false, + "additionalProperties": true, "properties": { - "dataChannel": { - "type": "object", - "required": [ - "version", - "supportedSerialization", - "supportedTransport" - ], - "additionalProperties": false, - "properties": { - "version": { - "type": "string" - }, - "supportedSerialization": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "JSONL", - "PROTOBUF", - "FLATBUFFERS" - ] - } - }, - "supportedTransport": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "STDIO", - "SOCKET" - ] - } - } - } + "normalizationRepository": { + "type": "string", + "description": "a field indicating the name of the repository to be used for normalization. If the value of the flag is NULL - normalization is not used." + }, + "normalizationTag": { + "type": "string", + "description": "a field indicating the tag of the docker repository to be used for normalization." + }, + "normalizationIntegrationType": { + "type": "string", + "description": "a field indicating the type of integration dialect to use for normalization." } } }, - "ConnectorBuildOptions": { - "title": "ConnectorBuildOptions", - "description": "metadata specific to the build process.", + "SuggestedStreams": { + "title": "SuggestedStreams", + "description": "A source's suggested streams. These will be suggested by default for new connections using this source. Otherwise, all streams will be selected. This is useful for when your source has a lot of streams, but the average user will only want a subset of them synced.", "type": "object", - "additionalProperties": false, + "additionalProperties": true, "properties": { - "baseImage": { - "type": "string" + "streams": { + "type": "array", + "description": "An array of streams that this connector suggests the average user will want. SuggestedStreams not being present for the source means that all streams are suggested. An empty list here means that no streams are suggested.", + "items": { + "type": "string" + } } } }, - "VersionBreakingChange": { - "description": "Contains information about a breaking change, including the deadline to upgrade and a message detailing the change.", + "GeneratedFields": { + "title": "GeneratedFields", + "description": "Optional schema for fields generated at metadata upload time", "type": "object", - "additionalProperties": false, - "required": [ - "upgradeDeadline", - "message" - ], "properties": { - "upgradeDeadline": { - "description": "The deadline by which to upgrade before the breaking change takes effect.", - "type": "string", - "format": "date" + "git": { + "$ref": "#/definitions/GitInfo" }, - "message": { - "description": "Descriptive message detailing the breaking change.", - "type": "string" + "source_file_info": { + "$ref": "#/definitions/SourceFileInfo" }, - "deadlineAction": { - "description": "Action to do when the deadline is reached.", - "type": "string", - "enum": [ - "auto_upgrade", - "disable" - ] + "metrics": { + "$ref": "#/definitions/ConnectorMetrics" }, - "migrationDocumentationUrl": { - "description": "URL to documentation on how to migrate to the current version. Defaults to ${documentationUrl}-migrations#${version}", + "sbomUrl": { "type": "string", - "format": "uri" + "description": "URL to the SBOM file" + } + } + }, + "ConnectorRegistryV0": { + "title": "ConnectorRegistryV0", + "description": "describes the collection of connectors retrieved from a registry", + "type": "object", + "required": [ + "destinations", + "sources" + ], + "properties": { + "destinations": { + "type": "array", + "items": { + "$ref": "#/definitions/ConnectorRegistryDestinationDefinition" + } }, - "scopedImpact": { - "description": "List of scopes that are impacted by the breaking change. If not specified, the breaking change cannot be scoped to reduce impact via the supported scope types.", + "sources": { "type": "array", - "minItems": 1, "items": { - "$ref": "#/definitions/BreakingChangeScope" + "$ref": "#/definitions/ConnectorRegistrySourceDefinition" + } + } + } + }, + "GitInfo": { + "title": "GitInfo", + "description": "Information about the author of the last commit that modified this file. DO NOT DEFINE THIS FIELD MANUALLY. It will be overwritten by the CI.", + "type": "object", + "additionalProperties": false, + "properties": { + "commit_sha": { + "type": "string", + "description": "The git commit sha of the last commit that modified this file." + }, + "commit_timestamp": { + "type": "string", + "format": "date-time", + "description": "The git commit timestamp of the last commit that modified this file." + }, + "commit_author": { + "type": "string", + "description": "The git commit author of the last commit that modified this file." + }, + "commit_author_email": { + "type": "string", + "description": "The git commit author email of the last commit that modified this file." + } + } + }, + "ConnectorReleaseCandidates": { + "description": "Each entry denotes a release candidate version of a connector.", + "type": "object", + "additionalProperties": false, + "minProperties": 1, + "maxProperties": 1, + "patternProperties": { + "^\\d+\\.\\d+\\.\\d+(-[0-9A-Za-z-.]+)?$": { + "$ref": "#/definitions/VersionReleaseCandidate" + } + } + }, + "VersionReleaseCandidate": { + "description": "Contains information about a release candidate version of a connector.", + "additionalProperties": false, + "type": "object", + "oneOf": [ + { + "$ref": "#/definitions/ConnectorRegistrySourceDefinition" + }, + { + "$ref": "#/definitions/ConnectorRegistryDestinationDefinition" + } + ] + }, + "ConnectorRegistryReleases": { + "title": "ConnectorRegistryReleases", + "description": "Contains information about different types of releases for a connector.", + "type": "object", + "additionalProperties": false, + "properties": { + "releaseCandidates": { + "$ref": "#/definitions/ConnectorReleaseCandidates" + }, + "rolloutConfiguration": { + "$ref": "#/definitions/RolloutConfiguration" + }, + "breakingChanges": { + "$ref": "#/definitions/ConnectorBreakingChanges" + }, + "migrationDocumentationUrl": { + "description": "URL to documentation on how to migrate from the previous version to the current version. Defaults to ${documentationUrl}-migrations", + "type": "string", + "format": "uri" + } + } + }, + "JobTypeResourceLimit": { + "description": "sets resource requirements for a specific job type for an actor definition. these values override the default, if both are set.", + "type": "object", + "additionalProperties": false, + "required": [ + "jobType", + "resourceRequirements" + ], + "properties": { + "jobType": { + "$ref": "#/definitions/JobType" + }, + "resourceRequirements": { + "$ref": "#/definitions/ResourceRequirements" + } + } + }, + "ActorDefinitionResourceRequirements": { + "title": "ActorDefinitionResourceRequirements", + "description": "actor definition specific resource requirements", + "type": "object", + "additionalProperties": false, + "properties": { + "default": { + "description": "if set, these are the requirements that should be set for ALL jobs run for this actor definition.", + "$ref": "#/definitions/ResourceRequirements" + }, + "jobSpecific": { + "type": "array", + "items": { + "$ref": "#/definitions/JobTypeResourceLimit" + } + } + } + }, + "ConnectorIPCOptions": { + "title": "ConnectorIPCOptions", + "type": "object", + "required": [ + "dataChannel" + ], + "additionalProperties": false, + "properties": { + "dataChannel": { + "type": "object", + "required": [ + "version", + "supportedSerialization", + "supportedTransport" + ], + "additionalProperties": false, + "properties": { + "version": { + "type": "string" + }, + "supportedSerialization": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "JSONL", + "PROTOBUF", + "FLATBUFFERS" + ] + } + }, + "supportedTransport": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "STDIO", + "SOCKET" + ] + } + } + } + } + } + }, + "VersionBreakingChange": { + "description": "Contains information about a breaking change, including the deadline to upgrade and a message detailing the change.", + "type": "object", + "additionalProperties": false, + "required": [ + "upgradeDeadline", + "message" + ], + "properties": { + "upgradeDeadline": { + "description": "The deadline by which to upgrade before the breaking change takes effect.", + "type": "string", + "format": "date" + }, + "message": { + "description": "Descriptive message detailing the breaking change.", + "type": "string" + }, + "deadlineAction": { + "description": "Action to do when the deadline is reached.", + "type": "string", + "enum": [ + "auto_upgrade", + "disable" + ] + }, + "migrationDocumentationUrl": { + "description": "URL to documentation on how to migrate to the current version. Defaults to ${documentationUrl}-migrations#${version}", + "type": "string", + "format": "uri" + }, + "scopedImpact": { + "description": "List of scopes that are impacted by the breaking change. If not specified, the breaking change cannot be scoped to reduce impact via the supported scope types.", + "type": "array", + "minItems": 1, + "items": { + "$ref": "#/definitions/BreakingChangeScope" } } } @@ -1076,7 +909,7 @@ ], "properties": { "scopeType": { - "type": "const", + "type": "string", "const": "stream" }, "impactedScopes": { @@ -1101,91 +934,254 @@ } } }, - "AllowedHosts": { - "title": "AllowedHosts", - "description": "A connector's allowed hosts. If present, the platform will limit communication to only hosts which are listed in `AllowedHosts.hosts`.", - "type": "object", - "additionalProperties": true, - "properties": { - "hosts": { - "type": "array", - "description": "An array of hosts that this connector can connect to. AllowedHosts not being present for the source or destination means that access to all hosts is allowed. An empty list here means that no network access is granted.", - "items": { - "type": "string" - } - } - } + "JobType": { + "title": "JobType", + "description": "enum that describes the different types of jobs that the platform runs.", + "type": "string", + "enum": [ + "get_spec", + "check_connection", + "discover_schema", + "sync", + "reset_connection", + "connection_updater", + "replicate" + ] }, - "AirbyteInternal": { - "title": "AirbyteInternal", - "description": "Fields for internal use only", + "ConnectorRegistrySourceDefinition": { + "title": "ConnectorRegistrySourceDefinition", + "description": "describes a source", "type": "object", + "required": [ + "sourceDefinitionId", + "name", + "dockerRepository", + "dockerImageTag", + "documentationUrl", + "spec" + ], "additionalProperties": true, "properties": { - "sl": { - "type": "integer", - "enum": [ - 0, - 100, - 200, - 300 - ] + "sourceDefinitionId": { + "type": "string", + "format": "uuid" }, - "ql": { - "type": "integer", + "name": { + "type": "string" + }, + "dockerRepository": { + "type": "string" + }, + "dockerImageTag": { + "type": "string" + }, + "documentationUrl": { + "type": "string" + }, + "icon": { + "type": "string" + }, + "iconUrl": { + "type": "string" + }, + "sourceType": { + "type": "string", "enum": [ - 0, - 100, - 200, - 300, - 400, - 500, - 600 + "api", + "file", + "database", + "custom" ] }, - "isEnterprise": { + "spec": { + "type": "object" + }, + "tombstone": { + "description": "if false, the configuration is active. if true, then this configuration is permanently off.", "type": "boolean", "default": false }, - "requireVersionIncrementsInPullRequests": { + "public": { + "description": "true if this connector definition is available to all workspaces", "type": "boolean", - "default": true, - "description": "When false, version increment checks will be skipped for this connector" + "default": false + }, + "custom": { + "description": "whether this is a custom connector definition", + "type": "boolean", + "default": false + }, + "releaseStage": { + "$ref": "#/definitions/ReleaseStage" + }, + "supportLevel": { + "$ref": "#/definitions/SupportLevel" + }, + "releaseDate": { + "description": "The date when this connector was first released, in yyyy-mm-dd format.", + "type": "string", + "format": "date" + }, + "resourceRequirements": { + "$ref": "#/definitions/ActorDefinitionResourceRequirements" + }, + "protocolVersion": { + "type": "string", + "description": "the Airbyte Protocol version supported by the connector" + }, + "allowedHosts": { + "$ref": "#/definitions/AllowedHosts" + }, + "suggestedStreams": { + "$ref": "#/definitions/SuggestedStreams" + }, + "maxSecondsBetweenMessages": { + "description": "Number of seconds allowed between 2 airbyte protocol messages. The source will timeout if this delay is reach", + "type": "integer" + }, + "erdUrl": { + "type": "string", + "description": "The URL where you can visualize the ERD" + }, + "releases": { + "$ref": "#/definitions/ConnectorRegistryReleases" + }, + "ab_internal": { + "$ref": "#/definitions/AirbyteInternal" + }, + "generated": { + "$ref": "#/definitions/GeneratedFields" + }, + "packageInfo": { + "$ref": "#/definitions/ConnectorPackageInfo" + }, + "language": { + "type": "string", + "description": "The language the connector is written in" + }, + "supportsFileTransfer": { + "type": "boolean", + "default": false + }, + "supportsDataActivation": { + "type": "boolean", + "default": false } } }, - "JobTypeResourceLimit": { - "description": "sets resource requirements for a specific job type for an actor definition. these values override the default, if both are set.", + "SupportLevel": { + "title": "SupportLevel", + "description": "enum that describes a connector's release stage", + "type": "string", + "enum": [ + "community", + "certified", + "archived" + ] + }, + "SecretStore": { + "title": "SecretStore", + "description": "An object describing a secret store metadata", + "type": "object", + "additionalProperties": false, + "properties": { + "alias": { + "type": "string", + "description": "The alias of the secret store which can map to its actual secret address" + }, + "type": { + "type": "string", + "description": "The type of the secret store", + "enum": [ + "GSM" + ] + } + } + }, + "PyPi": { + "title": "PyPi", + "description": "describes the PyPi publishing options", "type": "object", "additionalProperties": false, "required": [ - "jobType", - "resourceRequirements" + "enabled", + "packageName" ], "properties": { - "jobType": { - "$ref": "#/definitions/JobType" + "enabled": { + "type": "boolean" }, - "resourceRequirements": { - "$ref": "#/definitions/ResourceRequirements" + "packageName": { + "type": "string", + "description": "The name of the package on PyPi." } } }, - "ActorDefinitionResourceRequirements": { - "title": "ActorDefinitionResourceRequirements", - "description": "actor definition specific resource requirements", + "RemoteRegistries": { + "title": "RemoteRegistries", + "description": "describes how the connector is published to remote registries", "type": "object", "additionalProperties": false, "properties": { - "default": { - "description": "if set, these are the requirements that should be set for ALL jobs run for this actor definition.", - "$ref": "#/definitions/ResourceRequirements" + "pypi": { + "$ref": "#/definitions/PyPi" + } + } + }, + "ConnectorTestSuiteOptions": { + "title": "ConnectorTestSuiteOptions", + "description": "Options for a specific connector test suite.", + "type": "object", + "required": [ + "suite" + ], + "additionalProperties": false, + "properties": { + "suite": { + "description": "Name of the configured test suite", + "type": "string", + "enum": [ + "unitTests", + "integrationTests", + "acceptanceTests", + "liveTests" + ] }, - "jobSpecific": { + "testSecrets": { + "description": "List of secrets required to run the test suite", "type": "array", "items": { - "$ref": "#/definitions/JobTypeResourceLimit" + "$ref": "#/definitions/Secret" } + }, + "testConnections": { + "description": "List of sandbox cloud connections that tests can be run against", + "type": "array", + "items": { + "$ref": "#/definitions/TestConnections" + } + } + } + }, + "SourceFileInfo": { + "title": "SourceFileInfo", + "description": "Information about the source file that generated the registry entry", + "type": "object", + "properties": { + "metadata_etag": { + "type": "string" + }, + "metadata_file_path": { + "type": "string" + }, + "metadata_bucket_name": { + "type": "string" + }, + "metadata_last_modified": { + "type": "string" + }, + "registry_entry_generated_at": { + "type": "string" } } } diff --git a/airbyte_cdk/test/models/connector_metadata/generated/models.py b/airbyte_cdk/test/models/connector_metadata/generated/models.py index 51227677c..3b95f4c4f 100644 --- a/airbyte_cdk/test/models/connector_metadata/generated/models.py +++ b/airbyte_cdk/test/models/connector_metadata/generated/models.py @@ -27,82 +27,87 @@ class ConnectorSubtype(Enum): vectorstore = "vectorstore" -class TestConnections(BaseModel): - class Config: - extra = Extra.forbid +class ReleaseStage(Enum): + alpha = "alpha" + beta = "beta" + generally_available = "generally_available" + custom = "custom" - name: str = Field(..., description="The connection name") - id: str = Field(..., description="The connection ID") +class RolloutConfiguration(BaseModel): + class Config: + extra = Extra.forbid -class SupportLevel(Enum): - community = "community" - certified = "certified" - archived = "archived" + enableProgressiveRollout: Optional[bool] = Field( + False, description="Whether to enable progressive rollout for the connector." + ) + initialPercentage: Optional[conint(ge=0, le=100)] = Field( + 0, + description="The percentage of users that should receive the new version initially.", + ) + maxPercentage: Optional[conint(ge=0, le=100)] = Field( + 50, + description="The percentage of users who should receive the release candidate during the test phase before full rollout.", + ) + advanceDelayMinutes: Optional[conint(ge=10)] = Field( + 10, + description="The number of minutes to wait before advancing the rollout percentage.", + ) -class SuggestedStreams(BaseModel): +class AllowedHosts(BaseModel): class Config: extra = Extra.allow - streams: Optional[List[str]] = Field( + hosts: Optional[List[str]] = Field( None, - description="An array of streams that this connector suggests the average user will want. SuggestedStreams not being present for the source means that all streams are suggested. An empty list here means that no streams are suggested.", + description="An array of hosts that this connector can connect to. AllowedHosts not being present for the source or destination means that access to all hosts is allowed. An empty list here means that no network access is granted.", ) -class SourceFileInfo(BaseModel): - metadata_etag: Optional[str] = None - metadata_file_path: Optional[str] = None - metadata_bucket_name: Optional[str] = None - metadata_last_modified: Optional[str] = None - registry_entry_generated_at: Optional[str] = None - - -class SecretStore(BaseModel): +class TestConnections(BaseModel): class Config: extra = Extra.forbid - alias: Optional[str] = Field( - None, - description="The alias of the secret store which can map to its actual secret address", - ) - type: Optional[Literal["GSM"]] = Field( - None, description="The type of the secret store" - ) + name: str = Field(..., description="The connection name") + id: str = Field(..., description="The connection ID") -class Secret(BaseModel): +class Sl(Enum): + integer_0 = 0 + integer_100 = 100 + integer_200 = 200 + integer_300 = 300 + + +class Ql(Enum): + integer_0 = 0 + integer_100 = 100 + integer_200 = 200 + integer_300 = 300 + integer_400 = 400 + integer_500 = 500 + integer_600 = 600 + + +class AirbyteInternal(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow - name: str = Field(..., description="The secret name in the secret store") - fileName: Optional[str] = Field( - None, - description="The name of the file to which the secret value would be persisted", + sl: Optional[Sl] = None + ql: Optional[Ql] = None + isEnterprise: Optional[bool] = False + requireVersionIncrementsInPullRequests: Optional[bool] = Field( + True, + description="When false, version increment checks will be skipped for this connector", ) - secretStore: SecretStore -class RolloutConfiguration(BaseModel): +class ConnectorBuildOptions(BaseModel): class Config: extra = Extra.forbid - enableProgressiveRollout: Optional[bool] = Field( - False, description="Whether to enable progressive rollout for the connector." - ) - initialPercentage: Optional[conint(ge=0, le=100)] = Field( - 0, - description="The percentage of users that should receive the new version initially.", - ) - maxPercentage: Optional[conint(ge=0, le=100)] = Field( - 50, - description="The percentage of users who should receive the release candidate during the test phase before full rollout.", - ) - advanceDelayMinutes: Optional[conint(ge=10)] = Field( - 10, - description="The number of minutes to wait before advancing the rollout percentage.", - ) + baseImage: Optional[str] = None class ResourceRequirements(BaseModel): @@ -115,26 +120,35 @@ class Config: memory_limit: Optional[str] = None -class PyPi(BaseModel): - class Config: - extra = Extra.forbid +class ConnectorPackageInfo(BaseModel): + cdk_version: Optional[str] = None - enabled: bool - packageName: str = Field(..., description="The name of the package on PyPi.") + +class Usage(Enum): + low = "low" + medium = "medium" + high = "high" -class RemoteRegistries(BaseModel): +class SyncSuccessRate(Enum): + low = "low" + medium = "medium" + high = "high" + + +class ConnectorMetric(BaseModel): class Config: - extra = Extra.forbid + extra = Extra.allow - pypi: Optional[PyPi] = None + usage: Optional[Union[str, Usage]] = None + sync_success_rate: Optional[Union[str, SyncSuccessRate]] = None + connector_version: Optional[str] = None -class ReleaseStage(Enum): - alpha = "alpha" - beta = "beta" - generally_available = "generally_available" - custom = "custom" +class ConnectorMetrics(BaseModel): + all: Optional[ConnectorMetric] = None + cloud: Optional[ConnectorMetric] = None + oss: Optional[ConnectorMetric] = None class NormalizationDestinationDefinitionConfig(BaseModel): @@ -155,14 +169,14 @@ class Config: ) -class JobType(Enum): - get_spec = "get_spec" - check_connection = "check_connection" - discover_schema = "discover_schema" - sync = "sync" - reset_connection = "reset_connection" - connection_updater = "connection_updater" - replicate = "replicate" +class SuggestedStreams(BaseModel): + class Config: + extra = Extra.allow + + streams: Optional[List[str]] = Field( + None, + description="An array of streams that this connector suggests the average user will want. SuggestedStreams not being present for the source means that all streams are suggested. An empty list here means that no streams are suggested.", + ) class GitInfo(BaseModel): @@ -187,65 +201,6 @@ class Config: ) -class Suite(Enum): - unitTests = "unitTests" - integrationTests = "integrationTests" - acceptanceTests = "acceptanceTests" - liveTests = "liveTests" - - -class ConnectorTestSuiteOptions(BaseModel): - class Config: - extra = Extra.forbid - - suite: Suite = Field(..., description="Name of the configured test suite") - testSecrets: Optional[List[Secret]] = Field( - None, description="List of secrets required to run the test suite" - ) - testConnections: Optional[List[TestConnections]] = Field( - None, - description="List of sandbox cloud connections that tests can be run against", - ) - - -class SourceType(Enum): - api = "api" - file = "file" - database = "database" - custom = "custom" - - -class ConnectorPackageInfo(BaseModel): - cdk_version: Optional[str] = None - - -class Usage(Enum): - low = "low" - medium = "medium" - high = "high" - - -class SyncSuccessRate(Enum): - low = "low" - medium = "medium" - high = "high" - - -class ConnectorMetric(BaseModel): - class Config: - extra = Extra.allow - - usage: Optional[Union[str, Usage]] = None - sync_success_rate: Optional[Union[str, SyncSuccessRate]] = None - connector_version: Optional[str] = None - - -class ConnectorMetrics(BaseModel): - all: Optional[ConnectorMetric] = None - cloud: Optional[ConnectorMetric] = None - oss: Optional[ConnectorMetric] = None - - class SupportedSerializationEnum(Enum): JSONL = "JSONL" PROTOBUF = "PROTOBUF" @@ -273,13 +228,6 @@ class Config: dataChannel: DataChannel -class ConnectorBuildOptions(BaseModel): - class Config: - extra = Extra.forbid - - baseImage: Optional[str] = None - - class DeadlineAction(Enum): auto_upgrade = "auto_upgrade" disable = "disable" @@ -289,7 +237,7 @@ class StreamBreakingChangeScope(BaseModel): class Config: extra = Extra.forbid - scopeType: Any = Field("stream", const=True) + scopeType: str = Field("stream", const=True) impactedScopes: List[str] = Field( ..., description="List of streams that are impacted by the breaking change.", @@ -297,44 +245,89 @@ class Config: ) -class AllowedHosts(BaseModel): +class JobType(Enum): + get_spec = "get_spec" + check_connection = "check_connection" + discover_schema = "discover_schema" + sync = "sync" + reset_connection = "reset_connection" + connection_updater = "connection_updater" + replicate = "replicate" + + +class SourceType(Enum): + api = "api" + file = "file" + database = "database" + custom = "custom" + + +class SupportLevel(Enum): + community = "community" + certified = "certified" + archived = "archived" + + +class SecretStore(BaseModel): class Config: - extra = Extra.allow + extra = Extra.forbid - hosts: Optional[List[str]] = Field( + alias: Optional[str] = Field( None, - description="An array of hosts that this connector can connect to. AllowedHosts not being present for the source or destination means that access to all hosts is allowed. An empty list here means that no network access is granted.", + description="The alias of the secret store which can map to its actual secret address", + ) + type: Optional[Literal["GSM"]] = Field( + None, description="The type of the secret store" ) -class Sl(Enum): - integer_0 = 0 - integer_100 = 100 - integer_200 = 200 - integer_300 = 300 +class PyPi(BaseModel): + class Config: + extra = Extra.forbid + enabled: bool + packageName: str = Field(..., description="The name of the package on PyPi.") -class Ql(Enum): - integer_0 = 0 - integer_100 = 100 - integer_200 = 200 - integer_300 = 300 - integer_400 = 400 - integer_500 = 500 - integer_600 = 600 +class RemoteRegistries(BaseModel): + class Config: + extra = Extra.forbid -class AirbyteInternal(BaseModel): + pypi: Optional[PyPi] = None + + +class Suite(Enum): + unitTests = "unitTests" + integrationTests = "integrationTests" + acceptanceTests = "acceptanceTests" + liveTests = "liveTests" + + +class SourceFileInfo(BaseModel): + metadata_etag: Optional[str] = None + metadata_file_path: Optional[str] = None + metadata_bucket_name: Optional[str] = None + metadata_last_modified: Optional[str] = None + registry_entry_generated_at: Optional[str] = None + + +class Secret(BaseModel): class Config: - extra = Extra.allow + extra = Extra.forbid - sl: Optional[Sl] = None - ql: Optional[Ql] = None - isEnterprise: Optional[bool] = False - requireVersionIncrementsInPullRequests: Optional[bool] = Field( - True, - description="When false, version increment checks will be skipped for this connector", + name: str = Field(..., description="The secret name in the secret store") + fileName: Optional[str] = Field( + None, + description="The name of the file to which the secret value would be persisted", ) + secretStore: SecretStore + + +class GeneratedFields(BaseModel): + git: Optional[GitInfo] = None + source_file_info: Optional[SourceFileInfo] = None + metrics: Optional[ConnectorMetrics] = None + sbomUrl: Optional[str] = Field(None, description="URL to the SBOM file") class JobTypeResourceLimit(BaseModel): @@ -356,6 +349,27 @@ class Config: jobSpecific: Optional[List[JobTypeResourceLimit]] = None +class BreakingChangeScope(BaseModel): + __root__: StreamBreakingChangeScope = Field( + ..., + description="A scope that can be used to limit the impact of a breaking change.", + ) + + +class ConnectorTestSuiteOptions(BaseModel): + class Config: + extra = Extra.forbid + + suite: Suite = Field(..., description="Name of the configured test suite") + testSecrets: Optional[List[Secret]] = Field( + None, description="List of secrets required to run the test suite" + ) + testConnections: Optional[List[TestConnections]] = Field( + None, + description="List of sandbox cloud connections that tests can be run against", + ) + + class RegistryOverrides(BaseModel): class Config: extra = Extra.forbid @@ -375,28 +389,6 @@ class Config: resourceRequirements: Optional[ActorDefinitionResourceRequirements] = None -class GeneratedFields(BaseModel): - git: Optional[GitInfo] = None - source_file_info: Optional[SourceFileInfo] = None - metrics: Optional[ConnectorMetrics] = None - sbomUrl: Optional[str] = Field(None, description="URL to the SBOM file") - - -class BreakingChangeScope(BaseModel): - __root__: StreamBreakingChangeScope = Field( - ..., - description="A scope that can be used to limit the impact of a breaking change.", - ) - - -class RegistryOverridesModel(BaseModel): - class Config: - extra = Extra.forbid - - oss: Optional[RegistryOverrides] = None - cloud: Optional[RegistryOverrides] = None - - class VersionBreakingChange(BaseModel): class Config: extra = Extra.forbid @@ -433,6 +425,14 @@ class Config: ) +class RegistryOverridesModel(BaseModel): + class Config: + extra = Extra.forbid + + oss: Optional[RegistryOverrides] = None + cloud: Optional[RegistryOverrides] = None + + class ConnectorReleases(BaseModel): class Config: extra = Extra.forbid @@ -506,23 +506,17 @@ class Config: data: Data -class ConnectorRegistryV0(BaseModel): - destinations: List[ConnectorRegistryDestinationDefinition] - sources: List[ConnectorRegistrySourceDefinition] - - -class ConnectorRegistrySourceDefinition(BaseModel): +class ConnectorRegistryDestinationDefinition(BaseModel): class Config: extra = Extra.allow - sourceDefinitionId: UUID + destinationDefinitionId: UUID name: str dockerRepository: str dockerImageTag: str documentationUrl: str icon: Optional[str] = None iconUrl: Optional[str] = None - sourceType: Optional[SourceType] = None spec: Dict[str, Any] tombstone: Optional[bool] = Field( False, @@ -541,28 +535,35 @@ class Config: None, description="The date when this connector was first released, in yyyy-mm-dd format.", ) + tags: Optional[List[str]] = Field( + None, + description="An array of tags that describe the connector. E.g: language:python, keyword:rds, etc.", + ) resourceRequirements: Optional[ActorDefinitionResourceRequirements] = None protocolVersion: Optional[str] = Field( None, description="the Airbyte Protocol version supported by the connector" ) - allowedHosts: Optional[AllowedHosts] = None - suggestedStreams: Optional[SuggestedStreams] = None - maxSecondsBetweenMessages: Optional[int] = Field( + normalizationConfig: Optional[NormalizationDestinationDefinitionConfig] = None + supportsDbt: Optional[bool] = Field( None, - description="Number of seconds allowed between 2 airbyte protocol messages. The source will timeout if this delay is reach", - ) - erdUrl: Optional[str] = Field( - None, description="The URL where you can visualize the ERD" + description="an optional flag indicating whether DBT is used in the normalization. If the flag value is NULL - DBT is not used.", ) + allowedHosts: Optional[AllowedHosts] = None releases: Optional[ConnectorRegistryReleases] = None ab_internal: Optional[AirbyteInternal] = None + supportsRefreshes: Optional[bool] = False + supportsFileTransfer: Optional[bool] = False + supportsDataActivation: Optional[bool] = False generated: Optional[GeneratedFields] = None packageInfo: Optional[ConnectorPackageInfo] = None language: Optional[str] = Field( None, description="The language the connector is written in" ) - supportsFileTransfer: Optional[bool] = False - supportsDataActivation: Optional[bool] = False + + +class ConnectorRegistryV0(BaseModel): + destinations: List[ConnectorRegistryDestinationDefinition] + sources: List[ConnectorRegistrySourceDefinition] class ConnectorReleaseCandidates(BaseModel): @@ -602,17 +603,18 @@ class Config: ) -class ConnectorRegistryDestinationDefinition(BaseModel): +class ConnectorRegistrySourceDefinition(BaseModel): class Config: extra = Extra.allow - destinationDefinitionId: UUID + sourceDefinitionId: UUID name: str dockerRepository: str dockerImageTag: str documentationUrl: str icon: Optional[str] = None iconUrl: Optional[str] = None + sourceType: Optional[SourceType] = None spec: Dict[str, Any] tombstone: Optional[bool] = Field( False, @@ -631,33 +633,31 @@ class Config: None, description="The date when this connector was first released, in yyyy-mm-dd format.", ) - tags: Optional[List[str]] = Field( - None, - description="An array of tags that describe the connector. E.g: language:python, keyword:rds, etc.", - ) resourceRequirements: Optional[ActorDefinitionResourceRequirements] = None protocolVersion: Optional[str] = Field( None, description="the Airbyte Protocol version supported by the connector" ) - normalizationConfig: Optional[NormalizationDestinationDefinitionConfig] = None - supportsDbt: Optional[bool] = Field( + allowedHosts: Optional[AllowedHosts] = None + suggestedStreams: Optional[SuggestedStreams] = None + maxSecondsBetweenMessages: Optional[int] = Field( None, - description="an optional flag indicating whether DBT is used in the normalization. If the flag value is NULL - DBT is not used.", + description="Number of seconds allowed between 2 airbyte protocol messages. The source will timeout if this delay is reach", + ) + erdUrl: Optional[str] = Field( + None, description="The URL where you can visualize the ERD" ) - allowedHosts: Optional[AllowedHosts] = None releases: Optional[ConnectorRegistryReleases] = None ab_internal: Optional[AirbyteInternal] = None - supportsRefreshes: Optional[bool] = False - supportsFileTransfer: Optional[bool] = False - supportsDataActivation: Optional[bool] = False generated: Optional[GeneratedFields] = None packageInfo: Optional[ConnectorPackageInfo] = None language: Optional[str] = Field( None, description="The language the connector is written in" ) + supportsFileTransfer: Optional[bool] = False + supportsDataActivation: Optional[bool] = False +ConnectorRegistryDestinationDefinition.update_forward_refs() ConnectorRegistryV0.update_forward_refs() -ConnectorRegistrySourceDefinition.update_forward_refs() ConnectorReleaseCandidates.update_forward_refs() VersionReleaseCandidate.update_forward_refs() diff --git a/bin/generate_connector_metadata_files.py b/bin/generate_connector_metadata_files.py deleted file mode 100755 index 3d24f0b52..000000000 --- a/bin/generate_connector_metadata_files.py +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) 2024 Airbyte, Inc., all rights reserved. - -""" -Generate Pydantic models and JSON schema for connector metadata validation. - -This script downloads metadata schema YAML files from the airbyte monorepo and generates: -1. A consolidated JSON schema file (metadata_schema.json) -2. A single Python file with all Pydantic models (models.py) generated from the JSON schema - -The generated files are used for validating connector metadata.yaml files. -""" - -import json -import re -import subprocess -import sys -import tempfile -from pathlib import Path -from typing import Any - -try: - import yaml -except ImportError: - print("Error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr) - sys.exit(1) - -OUTPUT_DIR_PATH = "airbyte_cdk/test/models/connector_metadata/generated" -AIRBYTE_REPO_URL = "https://github.com/airbytehq/airbyte.git" -SCHEMA_PATH = "airbyte-ci/connectors/metadata_service/lib/metadata_service/models/src" -DATAMODEL_CODEGEN_VERSION = "0.26.3" - - -def clone_schemas_from_github(temp_dir: Path) -> Path: - """Clone metadata schema YAML files from GitHub using sparse checkout.""" - clone_dir = temp_dir / "airbyte" - - print("Cloning metadata schemas from airbyte repo...", file=sys.stderr) - - subprocess.run( - [ - "git", - "clone", - "--depth", - "1", - "--filter=blob:none", - "--sparse", - AIRBYTE_REPO_URL, - str(clone_dir), - ], - check=True, - capture_output=True, - ) - - subprocess.run( - ["git", "-C", str(clone_dir), "sparse-checkout", "set", SCHEMA_PATH], - check=True, - capture_output=True, - ) - - schemas_dir = clone_dir / SCHEMA_PATH - print(f"Cloned schemas to {schemas_dir}", file=sys.stderr) - - return schemas_dir - - -def consolidate_yaml_schemas_to_json(yaml_dir_path: Path, output_json_path: Path) -> None: - """Consolidate all YAML schemas into a single JSON schema file.""" - schemas = {} - - for yaml_file in yaml_dir_path.glob("*.yaml"): - schema_name = yaml_file.stem - schema_content = yaml.safe_load(yaml_file.read_text()) - schemas[schema_name] = schema_content - - all_schema_names = set(schemas.keys()) - json_primitives = {"string", "number", "integer", "boolean", "object", "array", "null"} - - for schema_content in schemas.values(): - if isinstance(schema_content, dict) and "definitions" in schema_content: - all_schema_names.update(schema_content["definitions"].keys()) - - def fix_refs(obj: Any, in_definition: bool = False) -> Any: - """Recursively fix $ref and type references in schema objects.""" - if isinstance(obj, dict): - new_obj = {} - for key, value in obj.items(): - if (key == "$id" or key == "$schema") and in_definition: - continue - elif key == "$ref" and isinstance(value, str): - m = re.match(r"(?:.*/)?(?P[^/#]+)\.yaml(?P#.*)?$", value) - if m: - schema_name = m.group("name") - frag = m.group("frag") or "" - new_obj[key] = f"#/definitions/{schema_name}{frag}" - else: - new_obj[key] = value - elif key == "type" and isinstance(value, str): - if value in all_schema_names and value not in json_primitives: - new_obj["$ref"] = f"#/definitions/{value}" - else: - new_obj[key] = value - elif key == "type" and value == "const": - pass - else: - new_obj[key] = fix_refs(value, in_definition=in_definition) - return new_obj - elif isinstance(obj, list): - return [fix_refs(item, in_definition=in_definition) for item in obj] - else: - return obj - - # Find the main schema (ConnectorMetadataDefinitionV0) - main_schema = schemas.get("ConnectorMetadataDefinitionV0") - - if main_schema: - # Create a consolidated schema preserving main schema structure - consolidated = dict(main_schema) # shallow copy - consolidated.setdefault("$schema", "http://json-schema.org/draft-07/schema#") - consolidated.setdefault("title", "Connector Metadata Schema") - consolidated.setdefault( - "description", "Consolidated JSON schema for Airbyte connector metadata validation" - ) - - consolidated_definitions = dict(consolidated.get("definitions", {})) - - # Add all schemas (including their internal definitions) as top-level definitions - for schema_name, schema_content in schemas.items(): - if schema_name != "ConnectorMetadataDefinitionV0": - if isinstance(schema_content, dict) and "definitions" in schema_content: - for def_name, def_content in schema_content["definitions"].items(): - consolidated_definitions[def_name] = fix_refs( - def_content, in_definition=True - ) - schema_without_defs = { - k: v for k, v in schema_content.items() if k != "definitions" - } - consolidated_definitions[schema_name] = fix_refs( - schema_without_defs, in_definition=True - ) - else: - consolidated_definitions[schema_name] = fix_refs( - schema_content, in_definition=True - ) - - consolidated["definitions"] = consolidated_definitions - consolidated = fix_refs(consolidated, in_definition=False) - - output_json_path.write_text(json.dumps(consolidated, indent=2)) - print(f"Generated consolidated JSON schema: {output_json_path}", file=sys.stderr) - else: - print( - "Warning: ConnectorMetadataDefinitionV0 not found, generating simple consolidation", - file=sys.stderr, - ) - output_json_path.write_text(json.dumps(schemas, indent=2)) - - -def generate_models_from_json_schema(json_schema_path: Path, output_file_path: Path) -> None: - """Generate Pydantic models from consolidated JSON schema.""" - print("Running datamodel-codegen via uvx...", file=sys.stderr) - - subprocess.run( - [ - "uvx", - "--from", - f"datamodel-code-generator=={DATAMODEL_CODEGEN_VERSION}", - "datamodel-codegen", - "--input", - str(json_schema_path), - "--output", - str(output_file_path), - "--input-file-type", - "jsonschema", - "--disable-timestamp", - "--enum-field-as-literal", - "one", - "--set-default-enum-member", - "--use-double-quotes", - "--remove-special-field-name-prefix", - "--field-extra-keys", - "deprecated", - "deprecation_message", - ], - check=True, - ) - - content = output_file_path.read_text() - content = content.replace("from pydantic", "from pydantic.v1") - output_file_path.write_text(content) - - print(f"Generated models: {output_file_path}", file=sys.stderr) - - -def main() -> None: - print("Generating connector metadata models...", file=sys.stderr) - - with tempfile.TemporaryDirectory() as temp_dir: - temp_path = Path(temp_dir) - schemas_dir = clone_schemas_from_github(temp_path) - - output_dir = Path(OUTPUT_DIR_PATH) - output_dir.mkdir(parents=True, exist_ok=True) - - print("Consolidating YAML schemas into JSON...", file=sys.stderr) - json_schema_file = output_dir / "metadata_schema.json" - consolidate_yaml_schemas_to_json(schemas_dir, json_schema_file) - - print("Generating Python models from JSON schema...", file=sys.stderr) - output_file = output_dir / "models.py" - generate_models_from_json_schema(json_schema_file, output_file) - - print("Connector metadata model generation complete!", file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/unit_tests/models/connector_metadata/test_metadata_file.py b/unit_tests/models/connector_metadata/test_metadata_file.py new file mode 100644 index 000000000..53998001a --- /dev/null +++ b/unit_tests/models/connector_metadata/test_metadata_file.py @@ -0,0 +1,154 @@ +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +"""Tests for metadata file validation.""" + +import json +from pathlib import Path +from unittest.mock import Mock, patch + +import pytest + +from airbyte_cdk.models.connector_metadata import ( + ValidationResult, + get_metadata_schema, + validate_metadata_file, +) + + +@pytest.fixture +def test_schema_path(): + """Path to test schema file.""" + return Path(__file__).parent / "test_schema.json" + + +@pytest.fixture +def test_schema(test_schema_path): + """Load test schema.""" + return json.loads(test_schema_path.read_text()) + + +@pytest.fixture +def valid_metadata_file(tmp_path): + """Create a valid metadata.yaml file.""" + metadata_path = tmp_path / "metadata.yaml" + metadata_path.write_text( + """ +data: + dockerRepository: airbyte/source-test + dockerImageTag: 0.1.0 + tags: + - language:python +""" + ) + return metadata_path + + +@pytest.fixture +def invalid_metadata_file(tmp_path): + """Create an invalid metadata.yaml file (missing required field).""" + metadata_path = tmp_path / "metadata.yaml" + metadata_path.write_text( + """ +data: + dockerRepository: airbyte/source-test +""" + ) + return metadata_path + + +class TestGetMetadataSchema: + """Tests for get_metadata_schema function.""" + + def test_load_from_file_path(self, test_schema_path, test_schema): + """Test loading schema from file path.""" + schema = get_metadata_schema(test_schema_path) + assert schema == test_schema + + def test_load_from_string_path(self, test_schema_path, test_schema): + """Test loading schema from string path.""" + schema = get_metadata_schema(str(test_schema_path)) + assert schema == test_schema + + def test_file_not_found(self, tmp_path): + """Test error when schema file doesn't exist.""" + with pytest.raises(FileNotFoundError): + get_metadata_schema(tmp_path / "nonexistent.json") + + @patch("airbyte_cdk.models.connector_metadata.metadata_file.urlopen") + def test_load_from_url(self, mock_urlopen, test_schema): + """Test loading schema from URL.""" + mock_response = Mock() + mock_response.read.return_value = json.dumps(test_schema).encode("utf-8") + mock_response.__enter__ = Mock(return_value=mock_response) + mock_response.__exit__ = Mock(return_value=False) + mock_urlopen.return_value = mock_response + + schema = get_metadata_schema("https://example.com/schema.json") + assert schema == test_schema + mock_urlopen.assert_called_once() + + @patch("airbyte_cdk.models.connector_metadata.metadata_file.urlopen") + def test_url_fetch_error(self, mock_urlopen): + """Test error when URL fetch fails.""" + mock_urlopen.side_effect = Exception("Network error") + + with pytest.raises(RuntimeError, match="Failed to fetch schema"): + get_metadata_schema("https://example.com/schema.json") + + +class TestValidateMetadataFile: + """Tests for validate_metadata_file function.""" + + def test_valid_metadata(self, valid_metadata_file, test_schema_path): + """Test validation of valid metadata file.""" + result = validate_metadata_file(valid_metadata_file, test_schema_path) + assert isinstance(result, ValidationResult) + assert result.valid is True + assert len(result.errors) == 0 + assert result.metadata is not None + + def test_invalid_metadata_missing_field(self, invalid_metadata_file, test_schema_path): + """Test validation of invalid metadata file (missing required field).""" + result = validate_metadata_file(invalid_metadata_file, test_schema_path) + assert isinstance(result, ValidationResult) + assert result.valid is False + assert len(result.errors) > 0 + assert result.metadata is not None + + def test_file_not_found(self, tmp_path, test_schema_path): + """Test validation when metadata file doesn't exist.""" + result = validate_metadata_file(tmp_path / "nonexistent.yaml", test_schema_path) + assert result.valid is False + assert len(result.errors) == 1 + assert result.errors[0]["type"] == "file_not_found" + + def test_invalid_yaml(self, tmp_path, test_schema_path): + """Test validation when YAML is malformed.""" + metadata_path = tmp_path / "metadata.yaml" + metadata_path.write_text("invalid: yaml: content: [") + result = validate_metadata_file(metadata_path, test_schema_path) + assert result.valid is False + assert len(result.errors) == 1 + assert result.errors[0]["type"] == "yaml_parse_error" + + def test_missing_data_field(self, tmp_path, test_schema_path): + """Test validation when 'data' field is missing.""" + metadata_path = tmp_path / "metadata.yaml" + metadata_path.write_text("notdata: {}") + result = validate_metadata_file(metadata_path, test_schema_path) + assert result.valid is False + assert len(result.errors) == 1 + assert result.errors[0]["type"] == "missing_field" + + def test_schema_load_error(self, valid_metadata_file, tmp_path): + """Test validation when schema can't be loaded.""" + result = validate_metadata_file(valid_metadata_file, tmp_path / "nonexistent.json") + assert result.valid is False + assert len(result.errors) == 1 + assert result.errors[0]["type"] == "schema_load_error" + + @patch("airbyte_cdk.models.connector_metadata.metadata_file.get_metadata_schema") + def test_default_schema_url(self, mock_get_schema, valid_metadata_file, test_schema): + """Test that default schema URL is used when none provided.""" + mock_get_schema.return_value = test_schema + result = validate_metadata_file(valid_metadata_file) + mock_get_schema.assert_called_once_with(None) diff --git a/unit_tests/models/connector_metadata/test_schema.json b/unit_tests/models/connector_metadata/test_schema.json new file mode 100644 index 000000000..7797d9cf0 --- /dev/null +++ b/unit_tests/models/connector_metadata/test_schema.json @@ -0,0 +1,25 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "required": ["data"], + "properties": { + "data": { + "type": "object", + "required": ["dockerRepository", "dockerImageTag"], + "properties": { + "dockerRepository": { + "type": "string" + }, + "dockerImageTag": { + "type": "string" + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +}