Skip to content

Commit 36bd7eb

Browse files
authored
feat(spec): add table_properties.rs to spec (#1733)
## Which issue does this PR close? - Closes #1505 . ## What changes are included in this PR? - Adds `table_properties.rs` to hold and validate properties and set default values. Uses macros to simplify setting new properties. ## Are these changes tested? Yes
1 parent ec545f2 commit 36bd7eb

File tree

10 files changed

+327
-173
lines changed

10 files changed

+327
-173
lines changed

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,10 @@ futures = "0.3"
7676
hive_metastore = "0.2.0"
7777
http = "1.2"
7878
iceberg = { version = "0.7.0", path = "./crates/iceberg" }
79-
iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" }
8079
iceberg-catalog-glue = { version = "0.7.0", path = "./crates/catalog/glue" }
81-
iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" }
8280
iceberg-catalog-hms = { version = "0.7.0", path = "./crates/catalog/hms" }
81+
iceberg-catalog-rest = { version = "0.7.0", path = "./crates/catalog/rest" }
82+
iceberg-catalog-s3tables = { version = "0.7.0", path = "./crates/catalog/s3tables" }
8383
iceberg-datafusion = { version = "0.7.0", path = "./crates/integrations/datafusion" }
8484
indicatif = "0.17"
8585
itertools = "0.13"

crates/iceberg/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ opendal = { workspace = true }
7474
ordered-float = { workspace = true }
7575
parquet = { workspace = true, features = ["async"] }
7676
rand = { workspace = true }
77-
reqwest = { workspace = true }
7877
reqsign = { version = "0.16.3", optional = true, default-features = false }
78+
reqwest = { workspace = true }
7979
roaring = { workspace = true }
8080
rust_decimal = { workspace = true }
8181
serde = { workspace = true }

crates/iceberg/src/spec/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ mod sort;
3030
mod statistic_file;
3131
mod table_metadata;
3232
mod table_metadata_builder;
33+
mod table_properties;
3334
mod transform;
3435
mod values;
3536
mod view_metadata;
@@ -48,6 +49,7 @@ pub use snapshot_summary::*;
4849
pub use sort::*;
4950
pub use statistic_file::*;
5051
pub use table_metadata::*;
52+
pub use table_properties::*;
5153
pub use transform::*;
5254
pub use values::*;
5355
pub use view_metadata::*;

crates/iceberg/src/spec/table_metadata.rs

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -46,91 +46,6 @@ pub(crate) static ONE_MINUTE_MS: i64 = 60_000;
4646
pub(crate) static EMPTY_SNAPSHOT_ID: i64 = -1;
4747
pub(crate) static INITIAL_SEQUENCE_NUMBER: i64 = 0;
4848

49-
/// Reserved table property for table format version.
50-
///
51-
/// Iceberg will default a new table's format version to the latest stable and recommended
52-
/// version. This reserved property keyword allows users to override the Iceberg format version of
53-
/// the table metadata.
54-
///
55-
/// If this table property exists when creating a table, the table will use the specified format
56-
/// version. If a table updates this property, it will try to upgrade to the specified format
57-
/// version.
58-
pub const PROPERTY_FORMAT_VERSION: &str = "format-version";
59-
/// Reserved table property for table UUID.
60-
pub const PROPERTY_UUID: &str = "uuid";
61-
/// Reserved table property for the total number of snapshots.
62-
pub const PROPERTY_SNAPSHOT_COUNT: &str = "snapshot-count";
63-
/// Reserved table property for current snapshot summary.
64-
pub const PROPERTY_CURRENT_SNAPSHOT_SUMMARY: &str = "current-snapshot-summary";
65-
/// Reserved table property for current snapshot id.
66-
pub const PROPERTY_CURRENT_SNAPSHOT_ID: &str = "current-snapshot-id";
67-
/// Reserved table property for current snapshot timestamp.
68-
pub const PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP: &str = "current-snapshot-timestamp-ms";
69-
/// Reserved table property for the JSON representation of current schema.
70-
pub const PROPERTY_CURRENT_SCHEMA: &str = "current-schema";
71-
/// Reserved table property for the JSON representation of current(default) partition spec.
72-
pub const PROPERTY_DEFAULT_PARTITION_SPEC: &str = "default-partition-spec";
73-
/// Reserved table property for the JSON representation of current(default) sort order.
74-
pub const PROPERTY_DEFAULT_SORT_ORDER: &str = "default-sort-order";
75-
76-
/// Property key for max number of previous versions to keep.
77-
pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX: &str = "write.metadata.previous-versions-max";
78-
/// Default value for max number of previous versions to keep.
79-
pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT: usize = 100;
80-
81-
/// Property key for max number of partitions to keep summary stats for.
82-
pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT: &str = "write.summary.partition-limit";
83-
/// Default value for the max number of partitions to keep summary stats for.
84-
pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT: u64 = 0;
85-
86-
/// Reserved Iceberg table properties list.
87-
///
88-
/// Reserved table properties are only used to control behaviors when creating or updating a
89-
/// table. The value of these properties are not persisted as a part of the table metadata.
90-
pub const RESERVED_PROPERTIES: [&str; 9] = [
91-
PROPERTY_FORMAT_VERSION,
92-
PROPERTY_UUID,
93-
PROPERTY_SNAPSHOT_COUNT,
94-
PROPERTY_CURRENT_SNAPSHOT_ID,
95-
PROPERTY_CURRENT_SNAPSHOT_SUMMARY,
96-
PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP,
97-
PROPERTY_CURRENT_SCHEMA,
98-
PROPERTY_DEFAULT_PARTITION_SPEC,
99-
PROPERTY_DEFAULT_SORT_ORDER,
100-
];
101-
102-
/// Property key for number of commit retries.
103-
pub const PROPERTY_COMMIT_NUM_RETRIES: &str = "commit.retry.num-retries";
104-
/// Default value for number of commit retries.
105-
pub const PROPERTY_COMMIT_NUM_RETRIES_DEFAULT: usize = 4;
106-
107-
/// Property key for minimum wait time (ms) between retries.
108-
pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS: &str = "commit.retry.min-wait-ms";
109-
/// Default value for minimum wait time (ms) between retries.
110-
pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT: u64 = 100;
111-
112-
/// Property key for maximum wait time (ms) between retries.
113-
pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS: &str = "commit.retry.max-wait-ms";
114-
/// Default value for maximum wait time (ms) between retries.
115-
pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT: u64 = 60 * 1000; // 1 minute
116-
117-
/// Property key for total maximum retry time (ms).
118-
pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS: &str = "commit.retry.total-timeout-ms";
119-
/// Default value for total maximum retry time (ms).
120-
pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT: u64 = 30 * 60 * 1000; // 30 minutes
121-
122-
/// Default file format for data files
123-
pub const PROPERTY_DEFAULT_FILE_FORMAT: &str = "write.format.default";
124-
/// Default file format for delete files
125-
pub const PROPERTY_DELETE_DEFAULT_FILE_FORMAT: &str = "write.delete.format.default";
126-
/// Default value for data file format
127-
pub const PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT: &str = "parquet";
128-
129-
/// Target file size for newly written files.
130-
pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes";
131-
/// Default target file size
132-
pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB
133-
13449
/// Reference to [`TableMetadata`].
13550
pub type TableMetadataRef = Arc<TableMetadata>;
13651

crates/iceberg/src/spec/table_metadata_builder.rs

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@ use uuid::Uuid;
2222

2323
use super::{
2424
DEFAULT_PARTITION_SPEC_ID, DEFAULT_SCHEMA_ID, FormatVersion, MAIN_BRANCH, MetadataLog,
25-
ONE_MINUTE_MS, PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX,
26-
PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT, PartitionSpec, PartitionSpecBuilder,
27-
PartitionStatisticsFile, RESERVED_PROPERTIES, Schema, SchemaRef, Snapshot, SnapshotLog,
28-
SnapshotReference, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType,
29-
TableMetadata, UNPARTITIONED_LAST_ASSIGNED_ID, UnboundPartitionSpec,
25+
ONE_MINUTE_MS, PartitionSpec, PartitionSpecBuilder, PartitionStatisticsFile, Schema, SchemaRef,
26+
Snapshot, SnapshotLog, SnapshotReference, SnapshotRetention, SortOrder, SortOrderRef,
27+
StatisticsFile, StructType, TableMetadata, TableProperties, UNPARTITIONED_LAST_ASSIGNED_ID,
28+
UnboundPartitionSpec,
3029
};
3130
use crate::error::{Error, ErrorKind, Result};
3231
use crate::{TableCreation, TableUpdate};
@@ -247,7 +246,7 @@ impl TableMetadataBuilder {
247246
// List of specified properties that are RESERVED and should not be persisted.
248247
let reserved_properties = properties
249248
.keys()
250-
.filter(|key| RESERVED_PROPERTIES.contains(&key.as_str()))
249+
.filter(|key| TableProperties::RESERVED_PROPERTIES.contains(&key.as_str()))
251250
.map(ToString::to_string)
252251
.collect::<Vec<_>>();
253252

@@ -285,7 +284,7 @@ impl TableMetadataBuilder {
285284
// disallow removal of reserved properties
286285
let reserved_properties = properties
287286
.iter()
288-
.filter(|key| RESERVED_PROPERTIES.contains(&key.as_str()))
287+
.filter(|key| TableProperties::RESERVED_PROPERTIES.contains(&key.as_str()))
289288
.map(ToString::to_string)
290289
.collect::<Vec<_>>();
291290

@@ -1061,9 +1060,9 @@ impl TableMetadataBuilder {
10611060
let max_size = self
10621061
.metadata
10631062
.properties
1064-
.get(PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX)
1063+
.get(TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX)
10651064
.and_then(|v| v.parse::<usize>().ok())
1066-
.unwrap_or(PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)
1065+
.unwrap_or(TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)
10671066
.max(1);
10681067

10691068
if self.metadata.metadata_log.len() > max_size {
@@ -1360,8 +1359,8 @@ mod tests {
13601359
use crate::io::FileIOBuilder;
13611360
use crate::spec::{
13621361
BlobMetadata, NestedField, NullOrder, Operation, PartitionSpec, PrimitiveType, Schema,
1363-
SnapshotRetention, SortDirection, SortField, StructType, Summary, Transform, Type,
1364-
UnboundPartitionField,
1362+
SnapshotRetention, SortDirection, SortField, StructType, Summary, TableProperties,
1363+
Transform, Type, UnboundPartitionField,
13651364
};
13661365
use crate::table::Table;
13671366

@@ -2299,7 +2298,7 @@ mod tests {
22992298
let builder = builder_without_changes(FormatVersion::V2);
23002299
let metadata = builder
23012300
.set_properties(HashMap::from_iter(vec![(
2302-
PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX.to_string(),
2301+
TableProperties::PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX.to_string(),
23032302
"2".to_string(),
23042303
)]))
23052304
.unwrap()

0 commit comments

Comments
 (0)