|
23 | 23 | import pytest |
24 | 24 |
|
25 | 25 | from pyiceberg.table import CommitTableResponse, Table |
| 26 | +from pyiceberg.table.update import RemoveSnapshotsUpdate, update_table_metadata |
26 | 27 | from pyiceberg.table.update.snapshot import ExpireSnapshots |
27 | 28 |
|
28 | 29 |
|
@@ -280,3 +281,39 @@ def worker2() -> None: |
280 | 281 |
|
281 | 282 | assert results["expire1_snapshots"] == expected_1, "Worker 1 snapshots contaminated" |
282 | 283 | assert results["expire2_snapshots"] == expected_2, "Worker 2 snapshots contaminated" |
| 284 | + |
| 285 | + |
| 286 | +def test_update_remove_snapshots_with_statistics(table_v2_with_statistics: Table) -> None: |
| 287 | + """ |
| 288 | + Test removing snapshots from a table that has statistics. |
| 289 | +
|
| 290 | + This test exercises the code path where RemoveStatisticsUpdate is instantiated |
| 291 | + within the RemoveSnapshotsUpdate handler. Before the fix for #2558, this would |
| 292 | + fail with: TypeError: BaseModel.__init__() takes 1 positional argument but 2 were given |
| 293 | + """ |
| 294 | + # The table has 2 snapshots with IDs: 3051729675574597004 and 3055729675574597004 |
| 295 | + # Both snapshots have statistics associated with them |
| 296 | + REMOVE_SNAPSHOT = 3051729675574597004 |
| 297 | + KEEP_SNAPSHOT = 3055729675574597004 |
| 298 | + |
| 299 | + # Verify fixture assumptions |
| 300 | + assert len(table_v2_with_statistics.metadata.snapshots) == 2 |
| 301 | + assert len(table_v2_with_statistics.metadata.statistics) == 2 |
| 302 | + assert any(stat.snapshot_id == REMOVE_SNAPSHOT for stat in table_v2_with_statistics.metadata.statistics), ( |
| 303 | + "Snapshot to remove should have statistics" |
| 304 | + ) |
| 305 | + |
| 306 | + # This should trigger RemoveStatisticsUpdate instantiation for the removed snapshot |
| 307 | + update = RemoveSnapshotsUpdate(snapshot_ids=[REMOVE_SNAPSHOT]) |
| 308 | + new_metadata = update_table_metadata(table_v2_with_statistics.metadata, (update,)) |
| 309 | + |
| 310 | + # Verify the snapshot was removed |
| 311 | + assert len(new_metadata.snapshots) == 1 |
| 312 | + assert new_metadata.snapshots[0].snapshot_id == KEEP_SNAPSHOT |
| 313 | + |
| 314 | + # Verify the statistics for the removed snapshot were also removed |
| 315 | + assert len(new_metadata.statistics) == 1 |
| 316 | + assert new_metadata.statistics[0].snapshot_id == KEEP_SNAPSHOT |
| 317 | + assert not any(stat.snapshot_id == REMOVE_SNAPSHOT for stat in new_metadata.statistics), ( |
| 318 | + "Statistics for removed snapshot should be gone" |
| 319 | + ) |
0 commit comments