Skip to content

Commit 776701c

Browse files
chore: Split values.rs into separate files (#1840)
## Which issue does this PR close? - Closes #1807. ## What changes are included in this PR? Split up values.rs into component files in a values folder. Happy to discuss organization. Here are the details to account for line differences: 1. Copyright headers duplicated - Each of the new files got its own Apache license header 2. Module structure added - Created mod.rs with module declarations and re-exports 3. Module-level docs added - Changed from /*! Value in iceberg */ to individual //! style doc comments for each module 4. Visibility changes - Changed 5 constants from `private const` to `pub(crate) const`. This was necessary so these constants could be accessed across the split files: - `MAX_TIME_VALUE` - `INT_MAX` - `INT_MIN` - `LONG_MAX` - `LONG_MIN` ## Are these changes tested? Existing tests. --------- Co-authored-by: Renjie Liu <liurenjie2008@gmail.com>
1 parent cb88260 commit 776701c

File tree

10 files changed

+4450
-4287
lines changed

10 files changed

+4450
-4287
lines changed

crates/iceberg/src/spec/values.rs

Lines changed: 0 additions & 4287 deletions
This file was deleted.

crates/iceberg/src/spec/values/datum.rs

Lines changed: 1225 additions & 0 deletions
Large diffs are not rendered by default.

crates/iceberg/src/spec/values/literal.rs

Lines changed: 747 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Map collection type for Iceberg
19+
20+
use std::collections::HashMap;
21+
use std::hash::Hash;
22+
23+
use super::Literal;
24+
25+
/// Map is a collection of key-value pairs with a key type and a value type.
26+
/// It used in Literal::Map, to make it hashable, the order of key-value pairs is stored in a separate vector
27+
/// so that we can hash the map in a deterministic way. But it also means that the order of key-value pairs is matter
28+
/// for the hash value.
29+
///
30+
/// When converting to Arrow (e.g., for Iceberg), the map should be represented using
31+
/// the default field name "key_value".
32+
///
33+
/// Example:
34+
///
35+
/// ```text
36+
/// let key_value_field = Field::new(
37+
/// DEFAULT_MAP_FIELD_NAME,
38+
/// arrow_schema::DataType::Struct(vec![
39+
/// Arc::new(key_field.clone()),
40+
/// Arc::new(value_field.clone()),
41+
/// ].into()),
42+
/// false
43+
/// );
44+
/// '''
45+
#[derive(Clone, Debug, PartialEq, Eq)]
46+
pub struct Map {
47+
index: HashMap<Literal, usize>,
48+
pair: Vec<(Literal, Option<Literal>)>,
49+
}
50+
51+
impl Map {
52+
/// Creates a new empty map.
53+
pub fn new() -> Self {
54+
Self {
55+
index: HashMap::new(),
56+
pair: Vec::new(),
57+
}
58+
}
59+
60+
/// Return the number of key-value pairs in the map.
61+
pub fn len(&self) -> usize {
62+
self.pair.len()
63+
}
64+
65+
/// Returns true if the map contains no elements.
66+
pub fn is_empty(&self) -> bool {
67+
self.pair.is_empty()
68+
}
69+
70+
/// Inserts a key-value pair into the map.
71+
/// If the map did not have this key present, None is returned.
72+
/// If the map did have this key present, the value is updated, and the old value is returned.
73+
pub fn insert(&mut self, key: Literal, value: Option<Literal>) -> Option<Option<Literal>> {
74+
if let Some(index) = self.index.get(&key) {
75+
let old_value = std::mem::replace(&mut self.pair[*index].1, value);
76+
Some(old_value)
77+
} else {
78+
self.pair.push((key.clone(), value));
79+
self.index.insert(key, self.pair.len() - 1);
80+
None
81+
}
82+
}
83+
84+
/// Returns a reference to the value corresponding to the key.
85+
/// If the key is not present in the map, None is returned.
86+
pub fn get(&self, key: &Literal) -> Option<&Option<Literal>> {
87+
self.index.get(key).map(|index| &self.pair[*index].1)
88+
}
89+
90+
/// The order of map is matter, so this method used to compare two maps has same key-value pairs without considering the order.
91+
pub fn has_same_content(&self, other: &Map) -> bool {
92+
if self.len() != other.len() {
93+
return false;
94+
}
95+
96+
for (key, value) in &self.pair {
97+
match other.get(key) {
98+
Some(other_value) if value == other_value => (),
99+
_ => return false,
100+
}
101+
}
102+
103+
true
104+
}
105+
}
106+
107+
impl Default for Map {
108+
fn default() -> Self {
109+
Self::new()
110+
}
111+
}
112+
113+
impl Hash for Map {
114+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
115+
for (key, value) in &self.pair {
116+
key.hash(state);
117+
value.hash(state);
118+
}
119+
}
120+
}
121+
122+
impl FromIterator<(Literal, Option<Literal>)> for Map {
123+
fn from_iter<T: IntoIterator<Item = (Literal, Option<Literal>)>>(iter: T) -> Self {
124+
let mut map = Map::new();
125+
for (key, value) in iter {
126+
map.insert(key, value);
127+
}
128+
map
129+
}
130+
}
131+
132+
impl IntoIterator for Map {
133+
type Item = (Literal, Option<Literal>);
134+
type IntoIter = std::vec::IntoIter<Self::Item>;
135+
136+
fn into_iter(self) -> Self::IntoIter {
137+
self.pair.into_iter()
138+
}
139+
}
140+
141+
impl<const N: usize> From<[(Literal, Option<Literal>); N]> for Map {
142+
fn from(value: [(Literal, Option<Literal>); N]) -> Self {
143+
value.iter().cloned().collect()
144+
}
145+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! This module contains Iceberg value types
19+
20+
pub(crate) mod datum;
21+
mod literal;
22+
mod map;
23+
mod primitive;
24+
pub(crate) mod serde;
25+
mod struct_value;
26+
mod temporal;
27+
28+
#[cfg(test)]
29+
mod tests;
30+
31+
// Re-export all public types
32+
pub use datum::Datum;
33+
pub use literal::Literal;
34+
pub use map::Map;
35+
pub use primitive::PrimitiveLiteral;
36+
pub(crate) use serde::_serde::RawLiteral;
37+
pub use struct_value::Struct;
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Primitive literal types
19+
20+
use ordered_float::OrderedFloat;
21+
22+
/// Values present in iceberg type
23+
#[derive(Clone, Debug, PartialOrd, PartialEq, Hash, Eq)]
24+
pub enum PrimitiveLiteral {
25+
/// 0x00 for false, non-zero byte for true
26+
Boolean(bool),
27+
/// Stored as 4-byte little-endian
28+
Int(i32),
29+
/// Stored as 8-byte little-endian
30+
Long(i64),
31+
/// Stored as 4-byte little-endian
32+
Float(OrderedFloat<f32>),
33+
/// Stored as 8-byte little-endian
34+
Double(OrderedFloat<f64>),
35+
/// UTF-8 bytes (without length)
36+
String(String),
37+
/// Binary value (without length)
38+
Binary(Vec<u8>),
39+
/// Stored as 16-byte little-endian
40+
Int128(i128),
41+
/// Stored as 16-byte little-endian
42+
UInt128(u128),
43+
/// When a number is larger than it can hold
44+
AboveMax,
45+
/// When a number is smaller than it can hold
46+
BelowMin,
47+
}
48+
49+
impl PrimitiveLiteral {
50+
/// Returns true if the Literal represents a primitive type
51+
/// that can be a NaN, and that it's value is NaN
52+
pub fn is_nan(&self) -> bool {
53+
match self {
54+
PrimitiveLiteral::Double(val) => val.is_nan(),
55+
PrimitiveLiteral::Float(val) => val.is_nan(),
56+
_ => false,
57+
}
58+
}
59+
}

0 commit comments

Comments
 (0)