Skip to content

Commit 4f0d7ef

Browse files
authored
fix: add metadata_properties to _construct_parameters when update hive table (#2013)
<!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> Closes: #2010 # Rationale for this change This change adds metadata_properties to the _construct_parameters function to ensure metadata properties are included in the parameters. I'm not entirely confident about the changes, so please let me know if my understanding is correct—if so, I’ll proceed to add tests. Thanks you! # Are these changes tested? Not yet. # Are there any user-facing changes? Not sure.
1 parent 93931e4 commit 4f0d7ef

File tree

3 files changed

+45
-4
lines changed

3 files changed

+45
-4
lines changed

pyiceberg/catalog/hive.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -211,11 +211,18 @@ def _construct_hive_storage_descriptor(
211211
DEFAULT_PROPERTIES = {TableProperties.PARQUET_COMPRESSION: TableProperties.PARQUET_COMPRESSION_DEFAULT}
212212

213213

214-
def _construct_parameters(metadata_location: str, previous_metadata_location: Optional[str] = None) -> Dict[str, Any]:
214+
def _construct_parameters(
215+
metadata_location: str, previous_metadata_location: Optional[str] = None, metadata_properties: Optional[Properties] = None
216+
) -> Dict[str, Any]:
215217
properties = {PROP_EXTERNAL: "TRUE", PROP_TABLE_TYPE: "ICEBERG", PROP_METADATA_LOCATION: metadata_location}
216218
if previous_metadata_location:
217219
properties[PROP_PREVIOUS_METADATA_LOCATION] = previous_metadata_location
218220

221+
if metadata_properties:
222+
for key, value in metadata_properties.items():
223+
if key not in properties:
224+
properties[key] = str(value)
225+
219226
return properties
220227

221228

@@ -360,7 +367,7 @@ def _convert_iceberg_into_hive(self, table: Table) -> HiveTable:
360367
property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
361368
),
362369
tableType=EXTERNAL_TABLE,
363-
parameters=_construct_parameters(table.metadata_location),
370+
parameters=_construct_parameters(metadata_location=table.metadata_location, metadata_properties=table.properties),
364371
)
365372

366373
def _create_hive_table(self, open_client: Client, hive_table: HiveTable) -> None:
@@ -541,6 +548,7 @@ def commit_table(
541548
hive_table.parameters = _construct_parameters(
542549
metadata_location=updated_staged_table.metadata_location,
543550
previous_metadata_location=current_table.metadata_location,
551+
metadata_properties=updated_staged_table.properties,
544552
)
545553
open_client.alter_table_with_environment_context(
546554
dbname=database_name,

tests/catalog/test_hive.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,13 @@ def test_create_table(
342342
storedAsSubDirectories=None,
343343
),
344344
partitionKeys=None,
345-
parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location},
345+
parameters={
346+
"EXTERNAL": "TRUE",
347+
"table_type": "ICEBERG",
348+
"metadata_location": metadata_location,
349+
"write.parquet.compression-codec": "zstd",
350+
"owner": "javaberg",
351+
},
346352
viewOriginalText=None,
347353
viewExpandedText=None,
348354
tableType="EXTERNAL_TABLE",
@@ -517,7 +523,13 @@ def test_create_table_with_given_location_removes_trailing_slash(
517523
storedAsSubDirectories=None,
518524
),
519525
partitionKeys=None,
520-
parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location},
526+
parameters={
527+
"EXTERNAL": "TRUE",
528+
"table_type": "ICEBERG",
529+
"metadata_location": metadata_location,
530+
"write.parquet.compression-codec": "zstd",
531+
"owner": "javaberg",
532+
},
521533
viewOriginalText=None,
522534
viewExpandedText=None,
523535
tableType="EXTERNAL_TABLE",

tests/integration/test_reads.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,27 @@ def test_table_properties(catalog: Catalog) -> None:
112112
assert "None type is not a supported value in properties: property_name" in str(exc_info.value)
113113

114114

115+
@pytest.mark.integration
116+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
117+
def test_hive_properties(catalog: Catalog) -> None:
118+
table = create_table(catalog)
119+
table.transaction().set_properties({"abc": "def", "p1": "123"}).commit_transaction()
120+
121+
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])
122+
123+
with hive_client as open_client:
124+
hive_table = open_client.get_table(*TABLE_NAME)
125+
assert hive_table.parameters.get("abc") == "def"
126+
assert hive_table.parameters.get("p1") == "123"
127+
assert hive_table.parameters.get("not_exist_parameter") is None
128+
129+
table.transaction().remove_properties("abc").commit_transaction()
130+
131+
with hive_client as open_client:
132+
hive_table = open_client.get_table(*TABLE_NAME)
133+
assert hive_table.parameters.get("abc") is None
134+
135+
115136
@pytest.mark.integration
116137
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
117138
def test_table_properties_dict(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)