From 53e67714ec90d5d5a39c83998d6bc89996c4b9d8 Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Thu, 13 Nov 2025 13:19:32 +0100 Subject: [PATCH 1/9] Attempt to fix glue role assumption for iceberg --- .../source/iceberg/iceberg_common.py | 74 ++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 593b9af1327ad..61bc2b0b6ef95 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -3,10 +3,26 @@ from dataclasses import dataclass, field from typing import Any, Dict, Optional +import boto3 from humanfriendly import format_timespan from pydantic import Field, field_validator -from pyiceberg.catalog import Catalog, load_catalog +from pyiceberg.catalog import BOTOCORE_SESSION, Catalog, load_catalog +from pyiceberg.catalog.glue import ( + GLUE_ACCESS_KEY_ID, + GLUE_PROFILE_NAME, + GLUE_REGION, + GLUE_SECRET_ACCESS_KEY, + GLUE_SESSION_TOKEN, +) from pyiceberg.catalog.rest import RestCatalog +from pyiceberg.io import ( + AWS_ACCESS_KEY_ID, + AWS_REGION, + AWS_ROLE_ARN, + AWS_SECRET_ACCESS_KEY, + AWS_SESSION_TOKEN, +) +from pyiceberg.utils.properties import get_first_property_value from requests.adapters import HTTPAdapter from sortedcontainers import SortedList from urllib3.util import Retry @@ -32,6 +48,8 @@ DEFAULT_REST_TIMEOUT = 120 DEFAULT_REST_RETRY_POLICY = {"total": 3, "backoff_factor": 0.1} +GLUE_ROLE_ARN = "glue.role-arn" + class TimeoutHTTPAdapter(HTTPAdapter): def __init__(self, *args, **kwargs): @@ -168,6 +186,60 @@ def get_catalog(self) -> Catalog: logger.debug( "Initializing the catalog %s with config: %s", catalog_name, catalog_config ) + + # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed + if catalog_config.get("type") == "glue": + role_to_assume = get_first_property_value( + catalog_config, GLUE_ROLE_ARN, AWS_ROLE_ARN + ) + if role_to_assume: + logger.debug( + "Recognized role ARN in glue catalog config, attempting to workaround pyiceberg limitation in role assumption for the glue client" + ) + session = boto3.Session( + profile_name=catalog_config.get(GLUE_PROFILE_NAME), + region_name=get_first_property_value( + catalog_config, GLUE_REGION, AWS_REGION + ), + botocore_session=catalog_config.get(BOTOCORE_SESSION), + aws_access_key_id=get_first_property_value( + catalog_config, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID + ), + aws_secret_access_key=get_first_property_value( + catalog_config, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY + ), + aws_session_token=get_first_property_value( + catalog_config, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN + ), + ) + + sts_client = session.client("sts") + identity = sts_client.get_caller_identity() + logger.debug( + f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}" + ) + + # below might fail if such duration is not allowed per policies + try: + response = sts_client.assume_role( + RoleArn=role_to_assume, + RoleSessionName="session", + DurationSeconds=43200, + ) + except sts_client.exceptions.ClientError: + # Fallback to default duration + response = sts_client.assume_role( + RoleArn=role_to_assume, RoleSessionName="session" + ) + logger.debug(f"Assumed role: {response['AssumedRoleUser']}") + creds = response["Credentials"] + catalog_config[GLUE_ACCESS_KEY_ID] = creds["AccessKeyId"] + catalog_config[GLUE_SECRET_ACCESS_KEY] = creds["SecretAccessKey"] + catalog_config[GLUE_SESSION_TOKEN] = creds["SessionToken"] + catalog_config[GLUE_REGION] = response["AssumedRoleUser"]["Arn"].split( + ":" + )[4] + catalog = load_catalog(name=catalog_name, **catalog_config) if isinstance(catalog, RestCatalog): logger.debug( From efab896d1e64215158b718bdf258cbaf24628ebe Mon Sep 17 00:00:00 2001 From: pedro93 Date: Thu, 13 Nov 2025 12:45:47 +0000 Subject: [PATCH 2/9] Remove glue region auto-detection attempt --- .../src/datahub/ingestion/source/iceberg/iceberg_common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 61bc2b0b6ef95..34d89b4f14570 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -236,9 +236,6 @@ def get_catalog(self) -> Catalog: catalog_config[GLUE_ACCESS_KEY_ID] = creds["AccessKeyId"] catalog_config[GLUE_SECRET_ACCESS_KEY] = creds["SecretAccessKey"] catalog_config[GLUE_SESSION_TOKEN] = creds["SessionToken"] - catalog_config[GLUE_REGION] = response["AssumedRoleUser"]["Arn"].split( - ":" - )[4] catalog = load_catalog(name=catalog_name, **catalog_config) if isinstance(catalog, RestCatalog): From d5172c121d6ac3d05365c98a6be775a47f744596 Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Thu, 13 Nov 2025 16:50:04 +0100 Subject: [PATCH 3/9] Further improvements --- .../source/iceberg/iceberg_common.py | 98 +++++++++++-------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 34d89b4f14570..87a2cc29f50f4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -172,53 +172,54 @@ def is_profiling_enabled(self) -> bool: self.profiling.operation_config ) - def get_catalog(self) -> Catalog: - """Returns the Iceberg catalog instance as configured by the `catalog` dictionary. - - Returns: - Catalog: Iceberg catalog instance. - """ - if not self.catalog: - raise ValueError("No catalog configuration found") - - # Retrieve the dict associated with the one catalog entry - catalog_name, catalog_config = next(iter(self.catalog.items())) - logger.debug( - "Initializing the catalog %s with config: %s", catalog_name, catalog_config + def _custom_glue_catalog_handling(self, catalog_config: Dict[str, Any]) -> None: + role_to_assume = get_first_property_value( + catalog_config, GLUE_ROLE_ARN, AWS_ROLE_ARN ) + if role_to_assume: + logger.debug( + "Recognized role ARN in glue catalog config, attempting to workaround pyiceberg limitation in role assumption for the glue client" + ) + session = boto3.Session( + profile_name=catalog_config.get(GLUE_PROFILE_NAME), + region_name=get_first_property_value( + catalog_config, GLUE_REGION, AWS_REGION + ), + botocore_session=catalog_config.get(BOTOCORE_SESSION), + aws_access_key_id=get_first_property_value( + catalog_config, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID + ), + aws_secret_access_key=get_first_property_value( + catalog_config, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY + ), + aws_session_token=get_first_property_value( + catalog_config, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN + ), + ) - # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed - if catalog_config.get("type") == "glue": - role_to_assume = get_first_property_value( - catalog_config, GLUE_ROLE_ARN, AWS_ROLE_ARN + sts_client = session.client("sts") + identity = sts_client.get_caller_identity() + logger.debug( + f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}" ) - if role_to_assume: - logger.debug( - "Recognized role ARN in glue catalog config, attempting to workaround pyiceberg limitation in role assumption for the glue client" + current_role_name = None + if ":assumed-role/" in identity["Arn"]: + current_role_name = ( + identity["Arn"].split(":assumed-role/")[1].split("/")[0] ) - session = boto3.Session( - profile_name=catalog_config.get(GLUE_PROFILE_NAME), - region_name=get_first_property_value( - catalog_config, GLUE_REGION, AWS_REGION - ), - botocore_session=catalog_config.get(BOTOCORE_SESSION), - aws_access_key_id=get_first_property_value( - catalog_config, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID - ), - aws_secret_access_key=get_first_property_value( - catalog_config, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY - ), - aws_session_token=get_first_property_value( - catalog_config, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN - ), + + maybe_target_role_name = role_to_assume.split("/") + if len(maybe_target_role_name) < 2: + logger.warning( + f"Expected target role to be proper ARN, it doesn't appear to be so: {role_to_assume}, continuing nonetheless" ) + target_role_name = maybe_target_role_name[-1] - sts_client = session.client("sts") - identity = sts_client.get_caller_identity() + if current_role_name == target_role_name: logger.debug( - f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}" + "Current role and the role we wanted to assume are the same, continuing without further assumption steps" ) - + else: # below might fail if such duration is not allowed per policies try: response = sts_client.assume_role( @@ -237,6 +238,25 @@ def get_catalog(self) -> Catalog: catalog_config[GLUE_SECRET_ACCESS_KEY] = creds["SecretAccessKey"] catalog_config[GLUE_SESSION_TOKEN] = creds["SessionToken"] + def get_catalog(self) -> Catalog: + """Returns the Iceberg catalog instance as configured by the `catalog` dictionary. + + Returns: + Catalog: Iceberg catalog instance. + """ + if not self.catalog: + raise ValueError("No catalog configuration found") + + # Retrieve the dict associated with the one catalog entry + catalog_name, catalog_config = next(iter(self.catalog.items())) + logger.debug( + "Initializing the catalog %s with config: %s", catalog_name, catalog_config + ) + + # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed + if catalog_config.get("type") == "glue": + self._custom_glue_catalog_handling(catalog_config) + catalog = load_catalog(name=catalog_name, **catalog_config) if isinstance(catalog, RestCatalog): logger.debug( From 1c2d21c9af7c05678575313832047a0fa5da780f Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Thu, 13 Nov 2025 17:51:23 +0100 Subject: [PATCH 4/9] More verbosity --- .../src/datahub/ingestion/source/iceberg/iceberg_common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 87a2cc29f50f4..37a4a527b611f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -220,6 +220,7 @@ def _custom_glue_catalog_handling(self, catalog_config: Dict[str, Any]) -> None: "Current role and the role we wanted to assume are the same, continuing without further assumption steps" ) else: + logger.debug(f"Assuming the role {role_to_assume}") # below might fail if such duration is not allowed per policies try: response = sts_client.assume_role( From 6a2e9c3a53a292bd344ecd3b86e40ea3e4f97430 Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Thu, 13 Nov 2025 20:33:20 +0100 Subject: [PATCH 5/9] Graceful handling of OSError as warnings --- .../src/datahub/ingestion/source/iceberg/iceberg.py | 7 +++++++ metadata-ingestion/tests/unit/test_iceberg.py | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py index 3d9cdffb0a9c7..1a18bf5ef8206 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py @@ -340,6 +340,13 @@ def _try_processing_dataset( context=dataset_name, exc=e, ) + except OSError as e: + self.report.warning( + title="Can't read manifest", + message="Provided manifest path appeared impossible to read", + context=dataset_name, + exc=e, + ) except ValueError as e: if "Could not initialize FileIO" not in str(e): raise diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py index 69bd9d03cc397..5f4c4eba153f7 100644 --- a/metadata-ingestion/tests/unit/test_iceberg.py +++ b/metadata-ingestion/tests/unit/test_iceberg.py @@ -1359,6 +1359,9 @@ def _raise_server_error(_: Catalog) -> Never: def _raise_rest_error(_: Catalog) -> Never: raise RESTError() + def _raise_os_error(_: Catalog) -> Never: + raise OSError() + def _raise_fileio_error(_: Catalog) -> Never: raise ValueError("Could not initialize FileIO: abc.dummy.fileio") @@ -1424,6 +1427,7 @@ def _raise_fileio_error(_: Catalog) -> Never: "table9": _raise_server_error, "table10": _raise_fileio_error, "table11": _raise_rest_error, + "table12": _raise_os_error, } } ) @@ -1451,7 +1455,7 @@ def _raise_fileio_error(_: Catalog) -> Never: expected_wu_urns, ) assert ( - source.report.warnings.total_elements == 6 + source.report.warnings.total_elements == 7 ) # ServerError and RESTError exceptions are caught together assert source.report.failures.total_elements == 0 assert source.report.tables_scanned == 4 From ab68f1ae349b12c68b629bae1f2700acfa1753a2 Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Sun, 16 Nov 2025 22:18:32 +0100 Subject: [PATCH 6/9] Added comprehensive tests, refactored logic --- .../source/iceberg/iceberg_common.py | 23 +- metadata-ingestion/tests/unit/test_iceberg.py | 426 ++++++++++++++++++ 2 files changed, 439 insertions(+), 10 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 37a4a527b611f..c6fdc3356d7c2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -202,20 +202,23 @@ def _custom_glue_catalog_handling(self, catalog_config: Dict[str, Any]) -> None: logger.debug( f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}" ) - current_role_name = None - if ":assumed-role/" in identity["Arn"]: - current_role_name = ( - identity["Arn"].split(":assumed-role/")[1].split("/")[0] - ) - maybe_target_role_name = role_to_assume.split("/") - if len(maybe_target_role_name) < 2: + current_role_arn = None + try: + if ":assumed-role/" in identity["Arn"]: + current_role_arn = ( + "/".join(identity["Arn"].split("/")[0:-1]) + .replace(":assumed-role/", ":role/") + .replace("arn:aws:sts", "arn:aws:iam") + ) + logger.debug(f"Deducted current role: {current_role_arn}") + except Exception as e: logger.warning( - f"Expected target role to be proper ARN, it doesn't appear to be so: {role_to_assume}, continuing nonetheless" + "We couldn't convert currently assumed role to 'role' format so that we could compare " + f"it with the target role, will try to assume the target role nonetheless, exception: {e}" ) - target_role_name = maybe_target_role_name[-1] - if current_role_name == target_role_name: + if current_role_arn == role_to_assume: logger.debug( "Current role and the role we wanted to assume are the same, continuing without further assumption steps" ) diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py index 5f4c4eba153f7..bec6b1555b75f 100644 --- a/metadata-ingestion/tests/unit/test_iceberg.py +++ b/metadata-ingestion/tests/unit/test_iceberg.py @@ -15,6 +15,7 @@ from unittest.mock import patch import pytest +from botocore.exceptions import ClientError from pydantic import ValidationError from pyiceberg.catalog import Catalog from pyiceberg.exceptions import ( @@ -1601,3 +1602,428 @@ def test_ingesting_namespace_properties() -> None: ].customProperties == custom_properties ) + + +def test_glue_catalog_no_role_assumption() -> None: + """Test that when no role ARN is provided, no role assumption occurs.""" + catalog_config = { + "test_glue": { + "type": "glue", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch("datahub.ingestion.source.iceberg.iceberg_common.boto3") as mock_boto3, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + config.get_catalog() + + # To assume role we first need a boto3 Session object, since we are not getting it, there is guarantee + # we are not assuming role neither + mock_boto3.Session.assert_not_called() + + +def test_glue_catalog_role_assumption_same_role() -> None: + """Test that when current role matches target role, no assumption occurs.""" + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" + ) as mock_boto3_session, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + + mock_sts.get_caller_identity.return_value = { + "Arn": "arn:aws:sts::123456789012:assumed-role/MyRole/session-name", + "UserId": "AIDACKCEVSQ6C2EXAMPLE", + "Account": "123456789012", + } + + config.get_catalog() + mock_sts.get_caller_identity.assert_called_once() + + # Should NOT call assume_role since we're already in the target role + mock_sts.assume_role.assert_not_called() + + +def test_glue_catalog_role_assumption_same_role_name_different_account() -> None: + """Test that when current role matches target role, no assumption occurs.""" + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" + ) as mock_boto3_session, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + + mock_sts.get_caller_identity.return_value = { + "Arn": "arn:aws:sts::345678249436:assumed-role/MyRole/session", + "UserId": "AIDACKCEVSQ6C2EXAMPLE", + "Account": "123456789012", + } + + mock_sts.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "ASIAIOSFODNN7EXAMPLE", + "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY", + "SessionToken": "FwoGZXIvYXdzEBYaDH...", + "Expiration": "2024-01-01T00:00:00Z", + }, + "AssumedRoleUser": { + "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session", + "Arn": "arn:aws:sts::123456789012:assumed-role/MyRole/session", + }, + } + + config.get_catalog() + mock_sts.get_caller_identity.assert_called_once() + + mock_sts.assume_role.assert_called_once_with( + RoleArn="arn:aws:iam::123456789012:role/MyRole", + RoleSessionName="session", + DurationSeconds=43200, + ) + + # Verify credentials were updated in catalog config + updated_config = config.catalog["test_glue"] + assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE" + assert ( + updated_config["glue.secret-access-key"] + == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY" + ) + assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." + + +def test_glue_catalog_role_assumption_different_role() -> None: + """Test successful role assumption when current role differs from target.""" + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "s3.region": "us-west-2", + "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE", + "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" + ) as mock_boto3_session, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + + mock_sts.get_caller_identity.return_value = { + "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", + "UserId": "AIDACKCEVSQ6C2EXAMPLE", + "Account": "123456789012", + } + + mock_sts.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "ASIAIOSFODNN7EXAMPLE", + "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY", + "SessionToken": "FwoGZXIvYXdzEBYaDH...", + "Expiration": "2024-01-01T00:00:00Z", + }, + "AssumedRoleUser": { + "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session", + "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session", + }, + } + + config.get_catalog() + + mock_sts.assume_role.assert_called_once_with( + RoleArn="arn:aws:iam::123456789012:role/TargetRole", + RoleSessionName="session", + DurationSeconds=43200, + ) + + updated_config = config.catalog["test_glue"] + assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE" + assert ( + updated_config["glue.secret-access-key"] + == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY" + ) + assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." + + +def test_glue_catalog_role_assumption_fallback_duration() -> None: + """Test role assumption falls back to default duration on ClientError.""" + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" + ) as mock_boto3_session, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + + mock_sts.get_caller_identity.return_value = { + "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", + "UserId": "AIDACKCEVSQ6C2EXAMPLE", + "Account": "123456789012", + } + + mock_sts.exceptions.ClientError = ClientError + + # First call with long duration fails, second succeeds + mock_sts.assume_role.side_effect = [ + ClientError( + { + "Error": { + "Code": "ValidationError", + "Message": "DurationSeconds exceeds maximum", + } + }, + "AssumeRole", + ), + { + "Credentials": { + "AccessKeyId": "ASIAIOSFODNN7EXAMPLE", + "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY", + "SessionToken": "FwoGZXIvYXdzEBYaDH...", + "Expiration": "2024-01-01T00:00:00Z", + }, + "AssumedRoleUser": { + "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session", + "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session", + }, + }, + ] + + config.get_catalog() + + # Should call assume_role twice: once with long duration, once without + assert mock_sts.assume_role.call_count == 2 + + # First call with long duration + assert mock_sts.assume_role.call_args_list[0] == ( + (), + { + "RoleArn": "arn:aws:iam::123456789012:role/TargetRole", + "RoleSessionName": "session", + "DurationSeconds": 43200, + }, + ) + + # Second call without duration (default) + assert mock_sts.assume_role.call_args_list[1] == ( + (), + { + "RoleArn": "arn:aws:iam::123456789012:role/TargetRole", + "RoleSessionName": "session", + }, + ) + + +def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None: + """Test that client.role-arn property is also recognized for role assumption.""" + catalog_config = { + "test_glue": { + "type": "glue", + "client.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "client.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" + ) as mock_boto3_session, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + + mock_sts.get_caller_identity.return_value = { + "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", + "UserId": "AIDACKCEVSQ6C2EXAMPLE", + "Account": "123456789012", + } + + mock_sts.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "ASIAIOSFODNN7EXAMPLE", + "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY", + "SessionToken": "FwoGZXIvYXdzEBYaDH...", + "Expiration": "2024-01-01T00:00:00Z", + }, + "AssumedRoleUser": { + "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session", + "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session", + }, + } + + config.get_catalog() + + # Should recognize client.role-arn and perform role assumption + mock_sts.assume_role.assert_called_once() + + updated_config = config.catalog["test_glue"] + assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE" + assert ( + updated_config["glue.secret-access-key"] + == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY" + ) + assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." + + +def test_glue_catalog_role_assumption_non_assumed_role_identity() -> None: + """Test role assumption when current identity is not an assumed role (e.g., IAM user).""" + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" + ) as mock_boto3_session, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + + # Current identity is an IAM user, not an assumed role + mock_sts.get_caller_identity.return_value = { + "Arn": "arn:aws:iam::123456789012:user/my-user", + "UserId": "AIDACKCEVSQ6C2EXAMPLE", + "Account": "123456789012", + } + + mock_sts.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "ASIAIOSFODNN7EXAMPLE", + "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY", + "SessionToken": "FwoGZXIvYXdzEBYaDH...", + "Expiration": "2024-01-01T00:00:00Z", + }, + "AssumedRoleUser": { + "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session", + "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session", + }, + } + + config.get_catalog() + + mock_sts.assume_role.assert_called_once() + + updated_config = config.catalog["test_glue"] + assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE" + assert ( + updated_config["glue.secret-access-key"] + == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY" + ) + assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." + + +def test_glue_catalog_with_all_credential_parameters() -> None: + """Test that all credential parameters are passed correctly to boto3 Session.""" + role_to_assume = "arn:aws:iam::123456789012:role/TargetRole" + + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": role_to_assume, + "glue.region": "us-west-2", + "glue.profile-name": "my-profile", + "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE", + "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + "glue.session-token": "FwoGZXIvYXdzEB...", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with ( + patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" + ) as mock_boto3_session, + patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + ): + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + + mock_sts.get_caller_identity.return_value = { + "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", + "UserId": "AIDACKCEVSQ6C2EXAMPLE", + "Account": "123456789012", + } + + mock_sts.assume_role.return_value = { + "Credentials": { + "AccessKeyId": "ASIAIOSFODNN7EXAMPLE2", + "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY2", + "SessionToken": "FwoGZXIvYXdzEBYaDH2...", + "Expiration": "2024-01-01T00:00:00Z", + }, + "AssumedRoleUser": { + "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session", + "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session", + }, + } + + config.get_catalog() + + mock_boto3_session.assert_called_once_with( + profile_name="my-profile", + region_name="us-west-2", + botocore_session=None, + aws_access_key_id="AKIAIOSFODNN7EXAMPLE", + aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + aws_session_token="FwoGZXIvYXdzEB...", + ) + + mock_sts.assume_role.assert_called_once_with( + RoleArn=role_to_assume, + RoleSessionName="session", + DurationSeconds=43200, + ) + + updated_config = config.catalog["test_glue"] + assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE2" + assert ( + updated_config["glue.secret-access-key"] + == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY2" + ) + assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH2..." From f3479bc96fe6da13068f87c0519d4fe8fa127ffc Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Sun, 16 Nov 2025 23:50:43 +0100 Subject: [PATCH 7/9] Polishing tests --- metadata-ingestion/tests/unit/test_iceberg.py | 291 ++++++++---------- 1 file changed, 132 insertions(+), 159 deletions(-) diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py index bec6b1555b75f..624a58dde8d42 100644 --- a/metadata-ingestion/tests/unit/test_iceberg.py +++ b/metadata-ingestion/tests/unit/test_iceberg.py @@ -1604,46 +1604,66 @@ def test_ingesting_namespace_properties() -> None: ) -def test_glue_catalog_no_role_assumption() -> None: - """Test that when no role ARN is provided, no role assumption occurs.""" - catalog_config = { - "test_glue": { - "type": "glue", - "s3.region": "us-west-2", - } - } - config = IcebergSourceConfig(catalog=catalog_config) - - with ( - patch("datahub.ingestion.source.iceberg.iceberg_common.boto3") as mock_boto3, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), - ): - config.get_catalog() - - # To assume role we first need a boto3 Session object, since we are not getting it, there is guarantee - # we are not assuming role neither - mock_boto3.Session.assert_not_called() +class TestGlueCatalogRoleAssumption: + """ + This class tests logic we have to workaround PyIceberg library bug, which causes it to not assume indicated IAM role + when connecting to a Glue catalog + """ + @pytest.fixture(autouse=True) + def mock_load_catalog(self): + """ + get_catalog function, which we are testing in this class, would call load_catalog, which would in turn + make a call to boto3.Session, it would bloat our tests, therefore we are mocking it for all of them + """ + with patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"): + yield -def test_glue_catalog_role_assumption_same_role() -> None: - """Test that when current role matches target role, no assumption occurs.""" - catalog_config = { - "test_glue": { - "type": "glue", - "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole", - "s3.region": "us-west-2", - } - } - config = IcebergSourceConfig(catalog=catalog_config) + @pytest.fixture + def mock_boto3_session(self): + """Fixture to mock boto3.Session and return configured mocks. - with ( - patch( + Returns: + tuple: (mock_boto3_session, mock_sts_client) for use in tests + """ + with patch( "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" - ) as mock_boto3_session, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), - ): - mock_session_instance = mock_boto3_session.return_value - mock_sts = mock_session_instance.client.return_value + ) as mock_boto3_session: + mock_session_instance = mock_boto3_session.return_value + mock_sts = mock_session_instance.client.return_value + yield mock_boto3_session, mock_sts + + def test_no_role_assumption(self): + """Test that when no role ARN is provided, no role assumption occurs.""" + catalog_config = { + "test_glue": { + "type": "glue", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) + + with patch( + "datahub.ingestion.source.iceberg.iceberg_common.boto3" + ) as mock_boto3: + config.get_catalog() + + # To assume role we first need a boto3 Session object, since we are not getting it, there is guarantee + # we are not assuming role neither + mock_boto3.Session.assert_not_called() + + def test_same_role_no_assumption(self, mock_boto3_session): + """Test that when current role matches target role, no assumption occurs.""" + mock_session, mock_sts = mock_boto3_session + + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) mock_sts.get_caller_identity.return_value = { "Arn": "arn:aws:sts::123456789012:assumed-role/MyRole/session-name", @@ -1657,26 +1677,18 @@ def test_glue_catalog_role_assumption_same_role() -> None: # Should NOT call assume_role since we're already in the target role mock_sts.assume_role.assert_not_called() + def test_same_role_name_different_account(self, mock_boto3_session): + """Test that when current role name matches but account differs, assumption occurs.""" + mock_session, mock_sts = mock_boto3_session -def test_glue_catalog_role_assumption_same_role_name_different_account() -> None: - """Test that when current role matches target role, no assumption occurs.""" - catalog_config = { - "test_glue": { - "type": "glue", - "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole", - "s3.region": "us-west-2", + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole", + "s3.region": "us-west-2", + } } - } - config = IcebergSourceConfig(catalog=catalog_config) - - with ( - patch( - "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" - ) as mock_boto3_session, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), - ): - mock_session_instance = mock_boto3_session.return_value - mock_sts = mock_session_instance.client.return_value + config = IcebergSourceConfig(catalog=catalog_config) mock_sts.get_caller_identity.return_value = { "Arn": "arn:aws:sts::345678249436:assumed-role/MyRole/session", @@ -1715,28 +1727,20 @@ def test_glue_catalog_role_assumption_same_role_name_different_account() -> None ) assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." - -def test_glue_catalog_role_assumption_different_role() -> None: - """Test successful role assumption when current role differs from target.""" - catalog_config = { - "test_glue": { - "type": "glue", - "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", - "s3.region": "us-west-2", - "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE", - "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + def test_different_role_assumption(self, mock_boto3_session): + """Test successful role assumption when current role differs from target.""" + mock_session, mock_sts = mock_boto3_session + + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "s3.region": "us-west-2", + "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE", + "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + } } - } - config = IcebergSourceConfig(catalog=catalog_config) - - with ( - patch( - "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" - ) as mock_boto3_session, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), - ): - mock_session_instance = mock_boto3_session.return_value - mock_sts = mock_session_instance.client.return_value + config = IcebergSourceConfig(catalog=catalog_config) mock_sts.get_caller_identity.return_value = { "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", @@ -1773,26 +1777,18 @@ def test_glue_catalog_role_assumption_different_role() -> None: ) assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." + def test_fallback_duration(self, mock_boto3_session): + """Test role assumption falls back to default duration on ClientError.""" + mock_session, mock_sts = mock_boto3_session -def test_glue_catalog_role_assumption_fallback_duration() -> None: - """Test role assumption falls back to default duration on ClientError.""" - catalog_config = { - "test_glue": { - "type": "glue", - "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", - "s3.region": "us-west-2", + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "s3.region": "us-west-2", + } } - } - config = IcebergSourceConfig(catalog=catalog_config) - - with ( - patch( - "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" - ) as mock_boto3_session, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), - ): - mock_session_instance = mock_boto3_session.return_value - mock_sts = mock_session_instance.client.return_value + config = IcebergSourceConfig(catalog=catalog_config) mock_sts.get_caller_identity.return_value = { "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", @@ -1851,26 +1847,20 @@ def test_glue_catalog_role_assumption_fallback_duration() -> None: }, ) - -def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None: - """Test that client.role-arn property is also recognized for role assumption.""" - catalog_config = { - "test_glue": { - "type": "glue", - "client.role-arn": "arn:aws:iam::123456789012:role/TargetRole", - "client.region": "us-west-2", - } - } - config = IcebergSourceConfig(catalog=catalog_config) - - with ( - patch( - "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" - ) as mock_boto3_session, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + def test_glue_catalog_role_assumption_with_aws_role_arn_property( + self, mock_boto3_session ): - mock_session_instance = mock_boto3_session.return_value - mock_sts = mock_session_instance.client.return_value + """Test that client.role-arn property is also recognized for role assumption.""" + mock_session, mock_sts = mock_boto3_session + + catalog_config = { + "test_glue": { + "type": "glue", + "client.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "client.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) mock_sts.get_caller_identity.return_value = { "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", @@ -1893,7 +1883,6 @@ def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None: config.get_catalog() - # Should recognize client.role-arn and perform role assumption mock_sts.assume_role.assert_called_once() updated_config = config.catalog["test_glue"] @@ -1904,28 +1893,21 @@ def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None: ) assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." - -def test_glue_catalog_role_assumption_non_assumed_role_identity() -> None: - """Test role assumption when current identity is not an assumed role (e.g., IAM user).""" - catalog_config = { - "test_glue": { - "type": "glue", - "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", - "s3.region": "us-west-2", - } - } - config = IcebergSourceConfig(catalog=catalog_config) - - with ( - patch( - "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" - ) as mock_boto3_session, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), + def test_glue_catalog_role_assumption_non_assumed_role_identity( + self, mock_boto3_session ): - mock_session_instance = mock_boto3_session.return_value - mock_sts = mock_session_instance.client.return_value + """Test role assumption when current identity is not an assumed role (e.g., IAM user).""" + mock_session, mock_sts = mock_boto3_session + + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole", + "s3.region": "us-west-2", + } + } + config = IcebergSourceConfig(catalog=catalog_config) - # Current identity is an IAM user, not an assumed role mock_sts.get_caller_identity.return_value = { "Arn": "arn:aws:iam::123456789012:user/my-user", "UserId": "AIDACKCEVSQ6C2EXAMPLE", @@ -1957,32 +1939,23 @@ def test_glue_catalog_role_assumption_non_assumed_role_identity() -> None: ) assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..." - -def test_glue_catalog_with_all_credential_parameters() -> None: - """Test that all credential parameters are passed correctly to boto3 Session.""" - role_to_assume = "arn:aws:iam::123456789012:role/TargetRole" - - catalog_config = { - "test_glue": { - "type": "glue", - "glue.role-arn": role_to_assume, - "glue.region": "us-west-2", - "glue.profile-name": "my-profile", - "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE", - "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", - "glue.session-token": "FwoGZXIvYXdzEB...", + def test_glue_catalog_with_all_credential_parameters(self, mock_boto3_session): + """Test that all credential parameters are passed correctly to boto3 Session.""" + mock_session, mock_sts = mock_boto3_session + role_to_assume = "arn:aws:iam::123456789012:role/TargetRole" + + catalog_config = { + "test_glue": { + "type": "glue", + "glue.role-arn": role_to_assume, + "glue.region": "us-west-2", + "glue.profile-name": "my-profile", + "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE", + "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + "glue.session-token": "FwoGZXIvYXdzEB...", + } } - } - config = IcebergSourceConfig(catalog=catalog_config) - - with ( - patch( - "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session" - ) as mock_boto3_session, - patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"), - ): - mock_session_instance = mock_boto3_session.return_value - mock_sts = mock_session_instance.client.return_value + config = IcebergSourceConfig(catalog=catalog_config) mock_sts.get_caller_identity.return_value = { "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session", @@ -2005,7 +1978,7 @@ def test_glue_catalog_with_all_credential_parameters() -> None: config.get_catalog() - mock_boto3_session.assert_called_once_with( + mock_session.assert_called_once_with( profile_name="my-profile", region_name="us-west-2", botocore_session=None, From 2a9f1c873d335565526482de2c3120de2a34d121 Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Mon, 17 Nov 2025 00:11:23 +0100 Subject: [PATCH 8/9] Reduced log verbosity --- .../src/datahub/ingestion/source/iceberg/iceberg_common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index c6fdc3356d7c2..40c0947d00c97 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -253,9 +253,7 @@ def get_catalog(self) -> Catalog: # Retrieve the dict associated with the one catalog entry catalog_name, catalog_config = next(iter(self.catalog.items())) - logger.debug( - "Initializing the catalog %s with config: %s", catalog_name, catalog_config - ) + logger.debug("Initializing the catalog %s", catalog_name) # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed if catalog_config.get("type") == "glue": From c5a60ef85475930d1f67e44317611393625d5266 Mon Sep 17 00:00:00 2001 From: Piotr Skrydalewicz Date: Mon, 17 Nov 2025 00:19:10 +0100 Subject: [PATCH 9/9] Added link to the issue in pyiceberg github --- .../src/datahub/ingestion/source/iceberg/iceberg_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py index 40c0947d00c97..dfd198c87a714 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py @@ -255,7 +255,8 @@ def get_catalog(self) -> Catalog: catalog_name, catalog_config = next(iter(self.catalog.items())) logger.debug("Initializing the catalog %s", catalog_name) - # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed + # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, + # remove this code once pyiceberg is fixed, raised issue: https://github.com/apache/iceberg-python/issues/2747 if catalog_config.get("type") == "glue": self._custom_glue_catalog_handling(catalog_config)