From 53e67714ec90d5d5a39c83998d6bc89996c4b9d8 Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Thu, 13 Nov 2025 13:19:32 +0100
Subject: [PATCH 1/9] Attempt to fix glue role assumption for iceberg

---
 .../source/iceberg/iceberg_common.py          | 74 ++++++++++++++++++-
 1 file changed, 73 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
index 593b9af1327ad..61bc2b0b6ef95 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@@ -3,10 +3,26 @@
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional
 
+import boto3
 from humanfriendly import format_timespan
 from pydantic import Field, field_validator
-from pyiceberg.catalog import Catalog, load_catalog
+from pyiceberg.catalog import BOTOCORE_SESSION, Catalog, load_catalog
+from pyiceberg.catalog.glue import (
+    GLUE_ACCESS_KEY_ID,
+    GLUE_PROFILE_NAME,
+    GLUE_REGION,
+    GLUE_SECRET_ACCESS_KEY,
+    GLUE_SESSION_TOKEN,
+)
 from pyiceberg.catalog.rest import RestCatalog
+from pyiceberg.io import (
+    AWS_ACCESS_KEY_ID,
+    AWS_REGION,
+    AWS_ROLE_ARN,
+    AWS_SECRET_ACCESS_KEY,
+    AWS_SESSION_TOKEN,
+)
+from pyiceberg.utils.properties import get_first_property_value
 from requests.adapters import HTTPAdapter
 from sortedcontainers import SortedList
 from urllib3.util import Retry
@@ -32,6 +48,8 @@
 DEFAULT_REST_TIMEOUT = 120
 DEFAULT_REST_RETRY_POLICY = {"total": 3, "backoff_factor": 0.1}
 
+GLUE_ROLE_ARN = "glue.role-arn"
+
 
 class TimeoutHTTPAdapter(HTTPAdapter):
     def __init__(self, *args, **kwargs):
@@ -168,6 +186,60 @@ def get_catalog(self) -> Catalog:
         logger.debug(
             "Initializing the catalog %s with config: %s", catalog_name, catalog_config
         )
+
+        # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed
+        if catalog_config.get("type") == "glue":
+            role_to_assume = get_first_property_value(
+                catalog_config, GLUE_ROLE_ARN, AWS_ROLE_ARN
+            )
+            if role_to_assume:
+                logger.debug(
+                    "Recognized role ARN in glue catalog config, attempting to workaround pyiceberg limitation in role assumption for the glue client"
+                )
+                session = boto3.Session(
+                    profile_name=catalog_config.get(GLUE_PROFILE_NAME),
+                    region_name=get_first_property_value(
+                        catalog_config, GLUE_REGION, AWS_REGION
+                    ),
+                    botocore_session=catalog_config.get(BOTOCORE_SESSION),
+                    aws_access_key_id=get_first_property_value(
+                        catalog_config, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID
+                    ),
+                    aws_secret_access_key=get_first_property_value(
+                        catalog_config, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY
+                    ),
+                    aws_session_token=get_first_property_value(
+                        catalog_config, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN
+                    ),
+                )
+
+                sts_client = session.client("sts")
+                identity = sts_client.get_caller_identity()
+                logger.debug(
+                    f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}"
+                )
+
+                # below might fail if such duration is not allowed per policies
+                try:
+                    response = sts_client.assume_role(
+                        RoleArn=role_to_assume,
+                        RoleSessionName="session",
+                        DurationSeconds=43200,
+                    )
+                except sts_client.exceptions.ClientError:
+                    # Fallback to default duration
+                    response = sts_client.assume_role(
+                        RoleArn=role_to_assume, RoleSessionName="session"
+                    )
+                logger.debug(f"Assumed role: {response['AssumedRoleUser']}")
+                creds = response["Credentials"]
+                catalog_config[GLUE_ACCESS_KEY_ID] = creds["AccessKeyId"]
+                catalog_config[GLUE_SECRET_ACCESS_KEY] = creds["SecretAccessKey"]
+                catalog_config[GLUE_SESSION_TOKEN] = creds["SessionToken"]
+                catalog_config[GLUE_REGION] = response["AssumedRoleUser"]["Arn"].split(
+                    ":"
+                )[4]
+
         catalog = load_catalog(name=catalog_name, **catalog_config)
         if isinstance(catalog, RestCatalog):
             logger.debug(

From efab896d1e64215158b718bdf258cbaf24628ebe Mon Sep 17 00:00:00 2001
From: pedro93 <pedro.cls93@gmail.com>
Date: Thu, 13 Nov 2025 12:45:47 +0000
Subject: [PATCH 2/9] Remove glue region auto-detection attempt

---
 .../src/datahub/ingestion/source/iceberg/iceberg_common.py     | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
index 61bc2b0b6ef95..34d89b4f14570 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@@ -236,9 +236,6 @@ def get_catalog(self) -> Catalog:
                 catalog_config[GLUE_ACCESS_KEY_ID] = creds["AccessKeyId"]
                 catalog_config[GLUE_SECRET_ACCESS_KEY] = creds["SecretAccessKey"]
                 catalog_config[GLUE_SESSION_TOKEN] = creds["SessionToken"]
-                catalog_config[GLUE_REGION] = response["AssumedRoleUser"]["Arn"].split(
-                    ":"
-                )[4]
 
         catalog = load_catalog(name=catalog_name, **catalog_config)
         if isinstance(catalog, RestCatalog):

From d5172c121d6ac3d05365c98a6be775a47f744596 Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Thu, 13 Nov 2025 16:50:04 +0100
Subject: [PATCH 3/9] Further improvements

---
 .../source/iceberg/iceberg_common.py          | 98 +++++++++++--------
 1 file changed, 59 insertions(+), 39 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
index 34d89b4f14570..87a2cc29f50f4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@@ -172,53 +172,54 @@ def is_profiling_enabled(self) -> bool:
             self.profiling.operation_config
         )
 
-    def get_catalog(self) -> Catalog:
-        """Returns the Iceberg catalog instance as configured by the `catalog` dictionary.
-
-        Returns:
-            Catalog: Iceberg catalog instance.
-        """
-        if not self.catalog:
-            raise ValueError("No catalog configuration found")
-
-        # Retrieve the dict associated with the one catalog entry
-        catalog_name, catalog_config = next(iter(self.catalog.items()))
-        logger.debug(
-            "Initializing the catalog %s with config: %s", catalog_name, catalog_config
+    def _custom_glue_catalog_handling(self, catalog_config: Dict[str, Any]) -> None:
+        role_to_assume = get_first_property_value(
+            catalog_config, GLUE_ROLE_ARN, AWS_ROLE_ARN
         )
+        if role_to_assume:
+            logger.debug(
+                "Recognized role ARN in glue catalog config, attempting to workaround pyiceberg limitation in role assumption for the glue client"
+            )
+            session = boto3.Session(
+                profile_name=catalog_config.get(GLUE_PROFILE_NAME),
+                region_name=get_first_property_value(
+                    catalog_config, GLUE_REGION, AWS_REGION
+                ),
+                botocore_session=catalog_config.get(BOTOCORE_SESSION),
+                aws_access_key_id=get_first_property_value(
+                    catalog_config, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID
+                ),
+                aws_secret_access_key=get_first_property_value(
+                    catalog_config, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY
+                ),
+                aws_session_token=get_first_property_value(
+                    catalog_config, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN
+                ),
+            )
 
-        # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed
-        if catalog_config.get("type") == "glue":
-            role_to_assume = get_first_property_value(
-                catalog_config, GLUE_ROLE_ARN, AWS_ROLE_ARN
+            sts_client = session.client("sts")
+            identity = sts_client.get_caller_identity()
+            logger.debug(
+                f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}"
             )
-            if role_to_assume:
-                logger.debug(
-                    "Recognized role ARN in glue catalog config, attempting to workaround pyiceberg limitation in role assumption for the glue client"
+            current_role_name = None
+            if ":assumed-role/" in identity["Arn"]:
+                current_role_name = (
+                    identity["Arn"].split(":assumed-role/")[1].split("/")[0]
                 )
-                session = boto3.Session(
-                    profile_name=catalog_config.get(GLUE_PROFILE_NAME),
-                    region_name=get_first_property_value(
-                        catalog_config, GLUE_REGION, AWS_REGION
-                    ),
-                    botocore_session=catalog_config.get(BOTOCORE_SESSION),
-                    aws_access_key_id=get_first_property_value(
-                        catalog_config, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID
-                    ),
-                    aws_secret_access_key=get_first_property_value(
-                        catalog_config, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY
-                    ),
-                    aws_session_token=get_first_property_value(
-                        catalog_config, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN
-                    ),
+
+            maybe_target_role_name = role_to_assume.split("/")
+            if len(maybe_target_role_name) < 2:
+                logger.warning(
+                    f"Expected target role to be proper ARN, it doesn't appear to be so: {role_to_assume}, continuing nonetheless"
                 )
+            target_role_name = maybe_target_role_name[-1]
 
-                sts_client = session.client("sts")
-                identity = sts_client.get_caller_identity()
+            if current_role_name == target_role_name:
                 logger.debug(
-                    f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}"
+                    "Current role and the role we wanted to assume are the same, continuing without further assumption steps"
                 )
-
+            else:
                 # below might fail if such duration is not allowed per policies
                 try:
                     response = sts_client.assume_role(
@@ -237,6 +238,25 @@ def get_catalog(self) -> Catalog:
                 catalog_config[GLUE_SECRET_ACCESS_KEY] = creds["SecretAccessKey"]
                 catalog_config[GLUE_SESSION_TOKEN] = creds["SessionToken"]
 
+    def get_catalog(self) -> Catalog:
+        """Returns the Iceberg catalog instance as configured by the `catalog` dictionary.
+
+        Returns:
+            Catalog: Iceberg catalog instance.
+        """
+        if not self.catalog:
+            raise ValueError("No catalog configuration found")
+
+        # Retrieve the dict associated with the one catalog entry
+        catalog_name, catalog_config = next(iter(self.catalog.items()))
+        logger.debug(
+            "Initializing the catalog %s with config: %s", catalog_name, catalog_config
+        )
+
+        # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed
+        if catalog_config.get("type") == "glue":
+            self._custom_glue_catalog_handling(catalog_config)
+
         catalog = load_catalog(name=catalog_name, **catalog_config)
         if isinstance(catalog, RestCatalog):
             logger.debug(

From 1c2d21c9af7c05678575313832047a0fa5da780f Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Thu, 13 Nov 2025 17:51:23 +0100
Subject: [PATCH 4/9] More verbosity

---
 .../src/datahub/ingestion/source/iceberg/iceberg_common.py       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
index 87a2cc29f50f4..37a4a527b611f 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@@ -220,6 +220,7 @@ def _custom_glue_catalog_handling(self, catalog_config: Dict[str, Any]) -> None:
                     "Current role and the role we wanted to assume are the same, continuing without further assumption steps"
                 )
             else:
+                logger.debug(f"Assuming the role {role_to_assume}")
                 # below might fail if such duration is not allowed per policies
                 try:
                     response = sts_client.assume_role(

From 6a2e9c3a53a292bd344ecd3b86e40ea3e4f97430 Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Thu, 13 Nov 2025 20:33:20 +0100
Subject: [PATCH 5/9] Graceful handling of OSError as warnings

---
 .../src/datahub/ingestion/source/iceberg/iceberg.py        | 7 +++++++
 metadata-ingestion/tests/unit/test_iceberg.py              | 6 +++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
index 3d9cdffb0a9c7..1a18bf5ef8206 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg.py
@@ -340,6 +340,13 @@ def _try_processing_dataset(
                     context=dataset_name,
                     exc=e,
                 )
+            except OSError as e:
+                self.report.warning(
+                    title="Can't read manifest",
+                    message="Provided manifest path appeared impossible to read",
+                    context=dataset_name,
+                    exc=e,
+                )
             except ValueError as e:
                 if "Could not initialize FileIO" not in str(e):
                     raise
diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py
index 69bd9d03cc397..5f4c4eba153f7 100644
--- a/metadata-ingestion/tests/unit/test_iceberg.py
+++ b/metadata-ingestion/tests/unit/test_iceberg.py
@@ -1359,6 +1359,9 @@ def _raise_server_error(_: Catalog) -> Never:
     def _raise_rest_error(_: Catalog) -> Never:
         raise RESTError()
 
+    def _raise_os_error(_: Catalog) -> Never:
+        raise OSError()
+
     def _raise_fileio_error(_: Catalog) -> Never:
         raise ValueError("Could not initialize FileIO: abc.dummy.fileio")
 
@@ -1424,6 +1427,7 @@ def _raise_fileio_error(_: Catalog) -> Never:
                 "table9": _raise_server_error,
                 "table10": _raise_fileio_error,
                 "table11": _raise_rest_error,
+                "table12": _raise_os_error,
             }
         }
     )
@@ -1451,7 +1455,7 @@ def _raise_fileio_error(_: Catalog) -> Never:
             expected_wu_urns,
         )
         assert (
-            source.report.warnings.total_elements == 6
+            source.report.warnings.total_elements == 7
         )  # ServerError and RESTError exceptions are caught together
         assert source.report.failures.total_elements == 0
         assert source.report.tables_scanned == 4

From ab68f1ae349b12c68b629bae1f2700acfa1753a2 Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Sun, 16 Nov 2025 22:18:32 +0100
Subject: [PATCH 6/9] Added comprehensive tests, refactored logic

---
 .../source/iceberg/iceberg_common.py          |  23 +-
 metadata-ingestion/tests/unit/test_iceberg.py | 426 ++++++++++++++++++
 2 files changed, 439 insertions(+), 10 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
index 37a4a527b611f..c6fdc3356d7c2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@@ -202,20 +202,23 @@ def _custom_glue_catalog_handling(self, catalog_config: Dict[str, Any]) -> None:
             logger.debug(
                 f"Authenticated as {identity['Arn']}, attempting to assume a role: {role_to_assume}"
             )
-            current_role_name = None
-            if ":assumed-role/" in identity["Arn"]:
-                current_role_name = (
-                    identity["Arn"].split(":assumed-role/")[1].split("/")[0]
-                )
 
-            maybe_target_role_name = role_to_assume.split("/")
-            if len(maybe_target_role_name) < 2:
+            current_role_arn = None
+            try:
+                if ":assumed-role/" in identity["Arn"]:
+                    current_role_arn = (
+                        "/".join(identity["Arn"].split("/")[0:-1])
+                        .replace(":assumed-role/", ":role/")
+                        .replace("arn:aws:sts", "arn:aws:iam")
+                    )
+                    logger.debug(f"Deducted current role: {current_role_arn}")
+            except Exception as e:
                 logger.warning(
-                    f"Expected target role to be proper ARN, it doesn't appear to be so: {role_to_assume}, continuing nonetheless"
+                    "We couldn't convert currently assumed role to 'role' format so that we could compare "
+                    f"it with the target role, will try to assume the target role nonetheless, exception: {e}"
                 )
-            target_role_name = maybe_target_role_name[-1]
 
-            if current_role_name == target_role_name:
+            if current_role_arn == role_to_assume:
                 logger.debug(
                     "Current role and the role we wanted to assume are the same, continuing without further assumption steps"
                 )
diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py
index 5f4c4eba153f7..bec6b1555b75f 100644
--- a/metadata-ingestion/tests/unit/test_iceberg.py
+++ b/metadata-ingestion/tests/unit/test_iceberg.py
@@ -15,6 +15,7 @@
 from unittest.mock import patch
 
 import pytest
+from botocore.exceptions import ClientError
 from pydantic import ValidationError
 from pyiceberg.catalog import Catalog
 from pyiceberg.exceptions import (
@@ -1601,3 +1602,428 @@ def test_ingesting_namespace_properties() -> None:
             ].customProperties
             == custom_properties
         )
+
+
+def test_glue_catalog_no_role_assumption() -> None:
+    """Test that when no role ARN is provided, no role assumption occurs."""
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "s3.region": "us-west-2",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch("datahub.ingestion.source.iceberg.iceberg_common.boto3") as mock_boto3,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        config.get_catalog()
+
+        # To assume role we first need a boto3 Session object, since we are not getting it, there is guarantee
+        # we are not assuming role neither
+        mock_boto3.Session.assert_not_called()
+
+
+def test_glue_catalog_role_assumption_same_role() -> None:
+    """Test that when current role matches target role, no assumption occurs."""
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole",
+            "s3.region": "us-west-2",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
+        ) as mock_boto3_session,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        mock_session_instance = mock_boto3_session.return_value
+        mock_sts = mock_session_instance.client.return_value
+
+        mock_sts.get_caller_identity.return_value = {
+            "Arn": "arn:aws:sts::123456789012:assumed-role/MyRole/session-name",
+            "UserId": "AIDACKCEVSQ6C2EXAMPLE",
+            "Account": "123456789012",
+        }
+
+        config.get_catalog()
+        mock_sts.get_caller_identity.assert_called_once()
+
+        # Should NOT call assume_role since we're already in the target role
+        mock_sts.assume_role.assert_not_called()
+
+
+def test_glue_catalog_role_assumption_same_role_name_different_account() -> None:
+    """Test that when current role matches target role, no assumption occurs."""
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole",
+            "s3.region": "us-west-2",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
+        ) as mock_boto3_session,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        mock_session_instance = mock_boto3_session.return_value
+        mock_sts = mock_session_instance.client.return_value
+
+        mock_sts.get_caller_identity.return_value = {
+            "Arn": "arn:aws:sts::345678249436:assumed-role/MyRole/session",
+            "UserId": "AIDACKCEVSQ6C2EXAMPLE",
+            "Account": "123456789012",
+        }
+
+        mock_sts.assume_role.return_value = {
+            "Credentials": {
+                "AccessKeyId": "ASIAIOSFODNN7EXAMPLE",
+                "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY",
+                "SessionToken": "FwoGZXIvYXdzEBYaDH...",
+                "Expiration": "2024-01-01T00:00:00Z",
+            },
+            "AssumedRoleUser": {
+                "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session",
+                "Arn": "arn:aws:sts::123456789012:assumed-role/MyRole/session",
+            },
+        }
+
+        config.get_catalog()
+        mock_sts.get_caller_identity.assert_called_once()
+
+        mock_sts.assume_role.assert_called_once_with(
+            RoleArn="arn:aws:iam::123456789012:role/MyRole",
+            RoleSessionName="session",
+            DurationSeconds=43200,
+        )
+
+        # Verify credentials were updated in catalog config
+        updated_config = config.catalog["test_glue"]
+        assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE"
+        assert (
+            updated_config["glue.secret-access-key"]
+            == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY"
+        )
+        assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
+
+
+def test_glue_catalog_role_assumption_different_role() -> None:
+    """Test successful role assumption when current role differs from target."""
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+            "s3.region": "us-west-2",
+            "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE",
+            "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
+        ) as mock_boto3_session,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        mock_session_instance = mock_boto3_session.return_value
+        mock_sts = mock_session_instance.client.return_value
+
+        mock_sts.get_caller_identity.return_value = {
+            "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
+            "UserId": "AIDACKCEVSQ6C2EXAMPLE",
+            "Account": "123456789012",
+        }
+
+        mock_sts.assume_role.return_value = {
+            "Credentials": {
+                "AccessKeyId": "ASIAIOSFODNN7EXAMPLE",
+                "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY",
+                "SessionToken": "FwoGZXIvYXdzEBYaDH...",
+                "Expiration": "2024-01-01T00:00:00Z",
+            },
+            "AssumedRoleUser": {
+                "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session",
+                "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session",
+            },
+        }
+
+        config.get_catalog()
+
+        mock_sts.assume_role.assert_called_once_with(
+            RoleArn="arn:aws:iam::123456789012:role/TargetRole",
+            RoleSessionName="session",
+            DurationSeconds=43200,
+        )
+
+        updated_config = config.catalog["test_glue"]
+        assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE"
+        assert (
+            updated_config["glue.secret-access-key"]
+            == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY"
+        )
+        assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
+
+
+def test_glue_catalog_role_assumption_fallback_duration() -> None:
+    """Test role assumption falls back to default duration on ClientError."""
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+            "s3.region": "us-west-2",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
+        ) as mock_boto3_session,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        mock_session_instance = mock_boto3_session.return_value
+        mock_sts = mock_session_instance.client.return_value
+
+        mock_sts.get_caller_identity.return_value = {
+            "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
+            "UserId": "AIDACKCEVSQ6C2EXAMPLE",
+            "Account": "123456789012",
+        }
+
+        mock_sts.exceptions.ClientError = ClientError
+
+        # First call with long duration fails, second succeeds
+        mock_sts.assume_role.side_effect = [
+            ClientError(
+                {
+                    "Error": {
+                        "Code": "ValidationError",
+                        "Message": "DurationSeconds exceeds maximum",
+                    }
+                },
+                "AssumeRole",
+            ),
+            {
+                "Credentials": {
+                    "AccessKeyId": "ASIAIOSFODNN7EXAMPLE",
+                    "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY",
+                    "SessionToken": "FwoGZXIvYXdzEBYaDH...",
+                    "Expiration": "2024-01-01T00:00:00Z",
+                },
+                "AssumedRoleUser": {
+                    "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session",
+                    "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session",
+                },
+            },
+        ]
+
+        config.get_catalog()
+
+        # Should call assume_role twice: once with long duration, once without
+        assert mock_sts.assume_role.call_count == 2
+
+        # First call with long duration
+        assert mock_sts.assume_role.call_args_list[0] == (
+            (),
+            {
+                "RoleArn": "arn:aws:iam::123456789012:role/TargetRole",
+                "RoleSessionName": "session",
+                "DurationSeconds": 43200,
+            },
+        )
+
+        # Second call without duration (default)
+        assert mock_sts.assume_role.call_args_list[1] == (
+            (),
+            {
+                "RoleArn": "arn:aws:iam::123456789012:role/TargetRole",
+                "RoleSessionName": "session",
+            },
+        )
+
+
+def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None:
+    """Test that client.role-arn property is also recognized for role assumption."""
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "client.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+            "client.region": "us-west-2",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
+        ) as mock_boto3_session,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        mock_session_instance = mock_boto3_session.return_value
+        mock_sts = mock_session_instance.client.return_value
+
+        mock_sts.get_caller_identity.return_value = {
+            "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
+            "UserId": "AIDACKCEVSQ6C2EXAMPLE",
+            "Account": "123456789012",
+        }
+
+        mock_sts.assume_role.return_value = {
+            "Credentials": {
+                "AccessKeyId": "ASIAIOSFODNN7EXAMPLE",
+                "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY",
+                "SessionToken": "FwoGZXIvYXdzEBYaDH...",
+                "Expiration": "2024-01-01T00:00:00Z",
+            },
+            "AssumedRoleUser": {
+                "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session",
+                "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session",
+            },
+        }
+
+        config.get_catalog()
+
+        # Should recognize client.role-arn and perform role assumption
+        mock_sts.assume_role.assert_called_once()
+
+        updated_config = config.catalog["test_glue"]
+        assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE"
+        assert (
+            updated_config["glue.secret-access-key"]
+            == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY"
+        )
+        assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
+
+
+def test_glue_catalog_role_assumption_non_assumed_role_identity() -> None:
+    """Test role assumption when current identity is not an assumed role (e.g., IAM user)."""
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+            "s3.region": "us-west-2",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
+        ) as mock_boto3_session,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        mock_session_instance = mock_boto3_session.return_value
+        mock_sts = mock_session_instance.client.return_value
+
+        # Current identity is an IAM user, not an assumed role
+        mock_sts.get_caller_identity.return_value = {
+            "Arn": "arn:aws:iam::123456789012:user/my-user",
+            "UserId": "AIDACKCEVSQ6C2EXAMPLE",
+            "Account": "123456789012",
+        }
+
+        mock_sts.assume_role.return_value = {
+            "Credentials": {
+                "AccessKeyId": "ASIAIOSFODNN7EXAMPLE",
+                "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY",
+                "SessionToken": "FwoGZXIvYXdzEBYaDH...",
+                "Expiration": "2024-01-01T00:00:00Z",
+            },
+            "AssumedRoleUser": {
+                "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session",
+                "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session",
+            },
+        }
+
+        config.get_catalog()
+
+        mock_sts.assume_role.assert_called_once()
+
+        updated_config = config.catalog["test_glue"]
+        assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE"
+        assert (
+            updated_config["glue.secret-access-key"]
+            == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY"
+        )
+        assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
+
+
+def test_glue_catalog_with_all_credential_parameters() -> None:
+    """Test that all credential parameters are passed correctly to boto3 Session."""
+    role_to_assume = "arn:aws:iam::123456789012:role/TargetRole"
+
+    catalog_config = {
+        "test_glue": {
+            "type": "glue",
+            "glue.role-arn": role_to_assume,
+            "glue.region": "us-west-2",
+            "glue.profile-name": "my-profile",
+            "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE",
+            "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+            "glue.session-token": "FwoGZXIvYXdzEB...",
+        }
+    }
+    config = IcebergSourceConfig(catalog=catalog_config)
+
+    with (
+        patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
+        ) as mock_boto3_session,
+        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    ):
+        mock_session_instance = mock_boto3_session.return_value
+        mock_sts = mock_session_instance.client.return_value
+
+        mock_sts.get_caller_identity.return_value = {
+            "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
+            "UserId": "AIDACKCEVSQ6C2EXAMPLE",
+            "Account": "123456789012",
+        }
+
+        mock_sts.assume_role.return_value = {
+            "Credentials": {
+                "AccessKeyId": "ASIAIOSFODNN7EXAMPLE2",
+                "SecretAccessKey": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY2",
+                "SessionToken": "FwoGZXIvYXdzEBYaDH2...",
+                "Expiration": "2024-01-01T00:00:00Z",
+            },
+            "AssumedRoleUser": {
+                "AssumedRoleId": "AROA3XFRBF535PLBIFPI4:session",
+                "Arn": "arn:aws:sts::123456789012:assumed-role/TargetRole/session",
+            },
+        }
+
+        config.get_catalog()
+
+        mock_boto3_session.assert_called_once_with(
+            profile_name="my-profile",
+            region_name="us-west-2",
+            botocore_session=None,
+            aws_access_key_id="AKIAIOSFODNN7EXAMPLE",
+            aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+            aws_session_token="FwoGZXIvYXdzEB...",
+        )
+
+        mock_sts.assume_role.assert_called_once_with(
+            RoleArn=role_to_assume,
+            RoleSessionName="session",
+            DurationSeconds=43200,
+        )
+
+        updated_config = config.catalog["test_glue"]
+        assert updated_config["glue.access-key-id"] == "ASIAIOSFODNN7EXAMPLE2"
+        assert (
+            updated_config["glue.secret-access-key"]
+            == "wJalrXUtnFEMI/K7MDENG/bPxRfiCYZEXAMPLEKEY2"
+        )
+        assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH2..."

From f3479bc96fe6da13068f87c0519d4fe8fa127ffc Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Sun, 16 Nov 2025 23:50:43 +0100
Subject: [PATCH 7/9] Polishing tests

---
 metadata-ingestion/tests/unit/test_iceberg.py | 291 ++++++++----------
 1 file changed, 132 insertions(+), 159 deletions(-)

diff --git a/metadata-ingestion/tests/unit/test_iceberg.py b/metadata-ingestion/tests/unit/test_iceberg.py
index bec6b1555b75f..624a58dde8d42 100644
--- a/metadata-ingestion/tests/unit/test_iceberg.py
+++ b/metadata-ingestion/tests/unit/test_iceberg.py
@@ -1604,46 +1604,66 @@ def test_ingesting_namespace_properties() -> None:
         )
 
 
-def test_glue_catalog_no_role_assumption() -> None:
-    """Test that when no role ARN is provided, no role assumption occurs."""
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "s3.region": "us-west-2",
-        }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
-
-    with (
-        patch("datahub.ingestion.source.iceberg.iceberg_common.boto3") as mock_boto3,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
-    ):
-        config.get_catalog()
-
-        # To assume role we first need a boto3 Session object, since we are not getting it, there is guarantee
-        # we are not assuming role neither
-        mock_boto3.Session.assert_not_called()
+class TestGlueCatalogRoleAssumption:
+    """
+    This class tests logic we have to workaround PyIceberg library bug, which causes it to not assume indicated IAM role
+    when connecting to a Glue catalog
+    """
 
+    @pytest.fixture(autouse=True)
+    def mock_load_catalog(self):
+        """
+        get_catalog function, which we are testing in this class, would call load_catalog, which would in turn
+        make a call to boto3.Session, it would bloat our tests, therefore we are mocking it for all of them
+        """
+        with patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"):
+            yield
 
-def test_glue_catalog_role_assumption_same_role() -> None:
-    """Test that when current role matches target role, no assumption occurs."""
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole",
-            "s3.region": "us-west-2",
-        }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
+    @pytest.fixture
+    def mock_boto3_session(self):
+        """Fixture to mock boto3.Session and return configured mocks.
 
-    with (
-        patch(
+        Returns:
+            tuple: (mock_boto3_session, mock_sts_client) for use in tests
+        """
+        with patch(
             "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
-        ) as mock_boto3_session,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
-    ):
-        mock_session_instance = mock_boto3_session.return_value
-        mock_sts = mock_session_instance.client.return_value
+        ) as mock_boto3_session:
+            mock_session_instance = mock_boto3_session.return_value
+            mock_sts = mock_session_instance.client.return_value
+            yield mock_boto3_session, mock_sts
+
+    def test_no_role_assumption(self):
+        """Test that when no role ARN is provided, no role assumption occurs."""
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "s3.region": "us-west-2",
+            }
+        }
+        config = IcebergSourceConfig(catalog=catalog_config)
+
+        with patch(
+            "datahub.ingestion.source.iceberg.iceberg_common.boto3"
+        ) as mock_boto3:
+            config.get_catalog()
+
+            # To assume role we first need a boto3 Session object, since we are not getting it, there is guarantee
+            # we are not assuming role neither
+            mock_boto3.Session.assert_not_called()
+
+    def test_same_role_no_assumption(self, mock_boto3_session):
+        """Test that when current role matches target role, no assumption occurs."""
+        mock_session, mock_sts = mock_boto3_session
+
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole",
+                "s3.region": "us-west-2",
+            }
+        }
+        config = IcebergSourceConfig(catalog=catalog_config)
 
         mock_sts.get_caller_identity.return_value = {
             "Arn": "arn:aws:sts::123456789012:assumed-role/MyRole/session-name",
@@ -1657,26 +1677,18 @@ def test_glue_catalog_role_assumption_same_role() -> None:
         # Should NOT call assume_role since we're already in the target role
         mock_sts.assume_role.assert_not_called()
 
+    def test_same_role_name_different_account(self, mock_boto3_session):
+        """Test that when current role name matches but account differs, assumption occurs."""
+        mock_session, mock_sts = mock_boto3_session
 
-def test_glue_catalog_role_assumption_same_role_name_different_account() -> None:
-    """Test that when current role matches target role, no assumption occurs."""
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole",
-            "s3.region": "us-west-2",
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "glue.role-arn": "arn:aws:iam::123456789012:role/MyRole",
+                "s3.region": "us-west-2",
+            }
         }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
-
-    with (
-        patch(
-            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
-        ) as mock_boto3_session,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
-    ):
-        mock_session_instance = mock_boto3_session.return_value
-        mock_sts = mock_session_instance.client.return_value
+        config = IcebergSourceConfig(catalog=catalog_config)
 
         mock_sts.get_caller_identity.return_value = {
             "Arn": "arn:aws:sts::345678249436:assumed-role/MyRole/session",
@@ -1715,28 +1727,20 @@ def test_glue_catalog_role_assumption_same_role_name_different_account() -> None
         )
         assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
 
-
-def test_glue_catalog_role_assumption_different_role() -> None:
-    """Test successful role assumption when current role differs from target."""
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
-            "s3.region": "us-west-2",
-            "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE",
-            "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+    def test_different_role_assumption(self, mock_boto3_session):
+        """Test successful role assumption when current role differs from target."""
+        mock_session, mock_sts = mock_boto3_session
+
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+                "s3.region": "us-west-2",
+                "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE",
+                "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+            }
         }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
-
-    with (
-        patch(
-            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
-        ) as mock_boto3_session,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
-    ):
-        mock_session_instance = mock_boto3_session.return_value
-        mock_sts = mock_session_instance.client.return_value
+        config = IcebergSourceConfig(catalog=catalog_config)
 
         mock_sts.get_caller_identity.return_value = {
             "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
@@ -1773,26 +1777,18 @@ def test_glue_catalog_role_assumption_different_role() -> None:
         )
         assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
 
+    def test_fallback_duration(self, mock_boto3_session):
+        """Test role assumption falls back to default duration on ClientError."""
+        mock_session, mock_sts = mock_boto3_session
 
-def test_glue_catalog_role_assumption_fallback_duration() -> None:
-    """Test role assumption falls back to default duration on ClientError."""
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
-            "s3.region": "us-west-2",
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+                "s3.region": "us-west-2",
+            }
         }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
-
-    with (
-        patch(
-            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
-        ) as mock_boto3_session,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
-    ):
-        mock_session_instance = mock_boto3_session.return_value
-        mock_sts = mock_session_instance.client.return_value
+        config = IcebergSourceConfig(catalog=catalog_config)
 
         mock_sts.get_caller_identity.return_value = {
             "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
@@ -1851,26 +1847,20 @@ def test_glue_catalog_role_assumption_fallback_duration() -> None:
             },
         )
 
-
-def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None:
-    """Test that client.role-arn property is also recognized for role assumption."""
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "client.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
-            "client.region": "us-west-2",
-        }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
-
-    with (
-        patch(
-            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
-        ) as mock_boto3_session,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    def test_glue_catalog_role_assumption_with_aws_role_arn_property(
+        self, mock_boto3_session
     ):
-        mock_session_instance = mock_boto3_session.return_value
-        mock_sts = mock_session_instance.client.return_value
+        """Test that client.role-arn property is also recognized for role assumption."""
+        mock_session, mock_sts = mock_boto3_session
+
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "client.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+                "client.region": "us-west-2",
+            }
+        }
+        config = IcebergSourceConfig(catalog=catalog_config)
 
         mock_sts.get_caller_identity.return_value = {
             "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
@@ -1893,7 +1883,6 @@ def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None:
 
         config.get_catalog()
 
-        # Should recognize client.role-arn and perform role assumption
         mock_sts.assume_role.assert_called_once()
 
         updated_config = config.catalog["test_glue"]
@@ -1904,28 +1893,21 @@ def test_glue_catalog_role_assumption_with_aws_role_arn_property() -> None:
         )
         assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
 
-
-def test_glue_catalog_role_assumption_non_assumed_role_identity() -> None:
-    """Test role assumption when current identity is not an assumed role (e.g., IAM user)."""
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
-            "s3.region": "us-west-2",
-        }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
-
-    with (
-        patch(
-            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
-        ) as mock_boto3_session,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
+    def test_glue_catalog_role_assumption_non_assumed_role_identity(
+        self, mock_boto3_session
     ):
-        mock_session_instance = mock_boto3_session.return_value
-        mock_sts = mock_session_instance.client.return_value
+        """Test role assumption when current identity is not an assumed role (e.g., IAM user)."""
+        mock_session, mock_sts = mock_boto3_session
+
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "glue.role-arn": "arn:aws:iam::123456789012:role/TargetRole",
+                "s3.region": "us-west-2",
+            }
+        }
+        config = IcebergSourceConfig(catalog=catalog_config)
 
-        # Current identity is an IAM user, not an assumed role
         mock_sts.get_caller_identity.return_value = {
             "Arn": "arn:aws:iam::123456789012:user/my-user",
             "UserId": "AIDACKCEVSQ6C2EXAMPLE",
@@ -1957,32 +1939,23 @@ def test_glue_catalog_role_assumption_non_assumed_role_identity() -> None:
         )
         assert updated_config["glue.session-token"] == "FwoGZXIvYXdzEBYaDH..."
 
-
-def test_glue_catalog_with_all_credential_parameters() -> None:
-    """Test that all credential parameters are passed correctly to boto3 Session."""
-    role_to_assume = "arn:aws:iam::123456789012:role/TargetRole"
-
-    catalog_config = {
-        "test_glue": {
-            "type": "glue",
-            "glue.role-arn": role_to_assume,
-            "glue.region": "us-west-2",
-            "glue.profile-name": "my-profile",
-            "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE",
-            "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
-            "glue.session-token": "FwoGZXIvYXdzEB...",
+    def test_glue_catalog_with_all_credential_parameters(self, mock_boto3_session):
+        """Test that all credential parameters are passed correctly to boto3 Session."""
+        mock_session, mock_sts = mock_boto3_session
+        role_to_assume = "arn:aws:iam::123456789012:role/TargetRole"
+
+        catalog_config = {
+            "test_glue": {
+                "type": "glue",
+                "glue.role-arn": role_to_assume,
+                "glue.region": "us-west-2",
+                "glue.profile-name": "my-profile",
+                "glue.access-key-id": "AKIAIOSFODNN7EXAMPLE",
+                "glue.secret-access-key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+                "glue.session-token": "FwoGZXIvYXdzEB...",
+            }
         }
-    }
-    config = IcebergSourceConfig(catalog=catalog_config)
-
-    with (
-        patch(
-            "datahub.ingestion.source.iceberg.iceberg_common.boto3.Session"
-        ) as mock_boto3_session,
-        patch("datahub.ingestion.source.iceberg.iceberg_common.load_catalog"),
-    ):
-        mock_session_instance = mock_boto3_session.return_value
-        mock_sts = mock_session_instance.client.return_value
+        config = IcebergSourceConfig(catalog=catalog_config)
 
         mock_sts.get_caller_identity.return_value = {
             "Arn": "arn:aws:sts::123456789012:assumed-role/CurrentRole/session",
@@ -2005,7 +1978,7 @@ def test_glue_catalog_with_all_credential_parameters() -> None:
 
         config.get_catalog()
 
-        mock_boto3_session.assert_called_once_with(
+        mock_session.assert_called_once_with(
             profile_name="my-profile",
             region_name="us-west-2",
             botocore_session=None,

From 2a9f1c873d335565526482de2c3120de2a34d121 Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Mon, 17 Nov 2025 00:11:23 +0100
Subject: [PATCH 8/9] Reduced log verbosity

---
 .../src/datahub/ingestion/source/iceberg/iceberg_common.py    | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
index c6fdc3356d7c2..40c0947d00c97 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@@ -253,9 +253,7 @@ def get_catalog(self) -> Catalog:
 
         # Retrieve the dict associated with the one catalog entry
         catalog_name, catalog_config = next(iter(self.catalog.items()))
-        logger.debug(
-            "Initializing the catalog %s with config: %s", catalog_name, catalog_config
-        )
+        logger.debug("Initializing the catalog %s", catalog_name)
 
         # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed
         if catalog_config.get("type") == "glue":

From c5a60ef85475930d1f67e44317611393625d5266 Mon Sep 17 00:00:00 2001
From: Piotr Skrydalewicz <piotr.skrydalewicz@acryl.io>
Date: Mon, 17 Nov 2025 00:19:10 +0100
Subject: [PATCH 9/9] Added link to the issue in pyiceberg github

---
 .../src/datahub/ingestion/source/iceberg/iceberg_common.py     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
index 40c0947d00c97..dfd198c87a714 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/iceberg/iceberg_common.py
@@ -255,7 +255,8 @@ def get_catalog(self) -> Catalog:
         catalog_name, catalog_config = next(iter(self.catalog.items()))
         logger.debug("Initializing the catalog %s", catalog_name)
 
-        # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog, remove this code once pyiceberg is fixed
+        # workaround pyiceberg 0.10.0 issue with ignoring role assumption for glue catalog,
+        # remove this code once pyiceberg is fixed, raised issue: https://github.com/apache/iceberg-python/issues/2747
         if catalog_config.get("type") == "glue":
             self._custom_glue_catalog_handling(catalog_config)