From d6364b7036d0f1812d4830d56f48d895cb2e05eb Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 3 Nov 2025 12:49:54 +0000 Subject: [PATCH 1/4] Add support for converting bytes Resolves #86 --- crates/iceberg/src/spec/values.rs | 33 +++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index 4c763a3941..f41f036767 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -2682,12 +2682,33 @@ mod _serde { Type::Primitive(PrimitiveType::String) => Ok(Some(Literal::string(v))), _ => Err(invalid_err("string")), }, - // # TODO:https://github.com/apache/iceberg-rust/issues/86 - // rust avro don't support deserialize any bytes representation now. - RawLiteralEnum::Bytes(_) => Err(invalid_err_with_reason( - "bytes", - "todo: rust avro doesn't support deserialize any bytes representation now", - )), + RawLiteralEnum::Bytes(v) => match ty { + Type::Primitive(PrimitiveType::Binary) => Ok(Some(Literal::binary(v.to_vec()))), + Type::Primitive(PrimitiveType::Fixed(_)) => { + Ok(Some(Literal::fixed(v.to_vec()))) + } + Type::Primitive(PrimitiveType::Uuid) => { + if v.len() == 16 { + let bytes: [u8; 16] = v.as_slice().try_into().map_err(|_| { + invalid_err_with_reason("bytes", "UUID must be exactly 16 bytes") + })?; + Ok(Some(Literal::Primitive(PrimitiveLiteral::UInt128( + u128::from_be_bytes(bytes), + )))) + } else { + Err(invalid_err_with_reason( + "bytes", + "UUID must be exactly 16 bytes", + )) + } + } + Type::Primitive(PrimitiveType::Decimal { .. }) => { + Ok(Some(Literal::Primitive(PrimitiveLiteral::Int128( + i128::from_be_bytes(v.as_slice().try_into().unwrap()), + )))) + } + _ => Err(invalid_err("bytes")), + }, RawLiteralEnum::List(v) => match ty { Type::List(ty) => Ok(Some(Literal::List( v.list From dd4abead5aa793bb37c4c1940cd601ea40053698 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 3 Nov 2025 12:57:38 +0000 Subject: [PATCH 2/4] Update code and add some tests --- crates/iceberg/src/spec/values.rs | 290 +++++++++++++++++++++++++++++- 1 file changed, 284 insertions(+), 6 deletions(-) diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index f41f036767..5b895552fb 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -2684,8 +2684,19 @@ mod _serde { }, RawLiteralEnum::Bytes(v) => match ty { Type::Primitive(PrimitiveType::Binary) => Ok(Some(Literal::binary(v.to_vec()))), - Type::Primitive(PrimitiveType::Fixed(_)) => { - Ok(Some(Literal::fixed(v.to_vec()))) + Type::Primitive(PrimitiveType::Fixed(expected_len)) => { + if v.len() == *expected_len as usize { + Ok(Some(Literal::fixed(v.to_vec()))) + } else { + Err(invalid_err_with_reason( + "bytes", + &format!( + "Fixed type must be exactly {} bytes, got {}", + expected_len, + v.len() + ), + )) + } } Type::Primitive(PrimitiveType::Uuid) => { if v.len() == 16 { @@ -2702,10 +2713,45 @@ mod _serde { )) } } - Type::Primitive(PrimitiveType::Decimal { .. }) => { - Ok(Some(Literal::Primitive(PrimitiveLiteral::Int128( - i128::from_be_bytes(v.as_slice().try_into().unwrap()), - )))) + Type::Primitive(PrimitiveType::Decimal { precision, .. }) => { + let required_bytes = + Type::decimal_required_bytes(*precision).map_err(|e| { + invalid_err_with_reason( + "bytes", + &format!("Invalid decimal precision: {}", e), + ) + })? as usize; + + if v.len() == required_bytes { + // Pad the bytes to 16 bytes (i128 size) with sign extension + let mut padded_bytes = [0u8; 16]; + let start_idx = 16 - v.len(); + + // Copy the input bytes to the end of the array + padded_bytes[start_idx..].copy_from_slice(&v); + + // Sign extend if the number is negative (MSB is 1) + if !v.is_empty() && (v[0] & 0x80) != 0 { + // Fill the padding with 0xFF for negative numbers + for byte in &mut padded_bytes[..start_idx] { + *byte = 0xFF; + } + } + + Ok(Some(Literal::Primitive(PrimitiveLiteral::Int128( + i128::from_be_bytes(padded_bytes), + )))) + } else { + Err(invalid_err_with_reason( + "bytes", + &format!( + "Decimal with precision {} must be exactly {} bytes, got {}", + precision, + required_bytes, + v.len() + ), + )) + } } _ => Err(invalid_err("bytes")), }, @@ -3330,6 +3376,238 @@ mod tests { } } + fn check_raw_literal_bytes_serde_via_avro( + input_bytes: Vec, + expected_literal: Literal, + expected_type: &Type, + ) { + use apache_avro::types::Value; + + // Create an Avro bytes value and deserialize it through the RawLiteral path + let avro_value = Value::Bytes(input_bytes); + let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap(); + let result = raw_literal.try_into(expected_type).unwrap(); + assert_eq!(result, Some(expected_literal)); + } + + fn check_raw_literal_bytes_error_via_avro(input_bytes: Vec, expected_type: &Type) { + use apache_avro::types::Value; + + let avro_value = Value::Bytes(input_bytes); + let raw_literal: _serde::RawLiteral = apache_avro::from_value(&avro_value).unwrap(); + let result = raw_literal.try_into(expected_type); + assert!(result.is_err(), "Expected error but got: {:?}", result); + } + + #[test] + fn test_raw_literal_bytes_binary() { + let bytes = vec![1u8, 2u8, 3u8, 4u8, 5u8]; + check_raw_literal_bytes_serde_via_avro( + bytes.clone(), + Literal::binary(bytes), + &Type::Primitive(PrimitiveType::Binary), + ); + } + + #[test] + fn test_raw_literal_bytes_binary_empty() { + let bytes = vec![]; + check_raw_literal_bytes_serde_via_avro( + bytes.clone(), + Literal::binary(bytes), + &Type::Primitive(PrimitiveType::Binary), + ); + } + + #[test] + fn test_raw_literal_bytes_fixed_correct_length() { + let bytes = vec![1u8, 2u8, 3u8, 4u8]; + check_raw_literal_bytes_serde_via_avro( + bytes.clone(), + Literal::fixed(bytes), + &Type::Primitive(PrimitiveType::Fixed(4)), + ); + } + + #[test] + fn test_raw_literal_bytes_fixed_wrong_length() { + let bytes = vec![1u8, 2u8, 3u8]; // 3 bytes, but expecting 4 + check_raw_literal_bytes_error_via_avro(bytes, &Type::Primitive(PrimitiveType::Fixed(4))); + } + + #[test] + fn test_raw_literal_bytes_fixed_empty_correct_length() { + let bytes = vec![]; + check_raw_literal_bytes_serde_via_avro( + bytes.clone(), + Literal::fixed(bytes), + &Type::Primitive(PrimitiveType::Fixed(0)), + ); + } + + #[test] + fn test_raw_literal_bytes_uuid_correct_length() { + let uuid_bytes = vec![ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, + 0xcd, 0xef, + ]; + let expected_uuid = u128::from_be_bytes([ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, + 0xcd, 0xef, + ]); + check_raw_literal_bytes_serde_via_avro( + uuid_bytes, + Literal::Primitive(PrimitiveLiteral::UInt128(expected_uuid)), + &Type::Primitive(PrimitiveType::Uuid), + ); + } + + #[test] + fn test_raw_literal_bytes_uuid_wrong_length() { + let bytes = vec![1u8, 2u8, 3u8]; // 3 bytes, but UUID needs 16 + check_raw_literal_bytes_error_via_avro(bytes, &Type::Primitive(PrimitiveType::Uuid)); + } + + #[test] + fn test_raw_literal_bytes_decimal_precision_4_scale_2() { + // Precision 4 requires 2 bytes + let decimal_bytes = vec![0x04, 0xd2]; // 1234 in 2 bytes + let expected_decimal = 1234i128; + check_raw_literal_bytes_serde_via_avro( + decimal_bytes, + Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)), + &Type::Primitive(PrimitiveType::Decimal { + precision: 4, + scale: 2, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_precision_4_negative() { + // Precision 4 requires 2 bytes, negative number + let decimal_bytes = vec![0xfb, 0x2e]; // -1234 in 2 bytes + let expected_decimal = -1234i128; + check_raw_literal_bytes_serde_via_avro( + decimal_bytes, + Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)), + &Type::Primitive(PrimitiveType::Decimal { + precision: 4, + scale: 2, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_precision_9_scale_2() { + // Precision 9 requires 4 bytes + let decimal_bytes = vec![0x00, 0x12, 0xd6, 0x87]; // 1234567 in 4 bytes + let expected_decimal = 1234567i128; + check_raw_literal_bytes_serde_via_avro( + decimal_bytes, + Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)), + &Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_precision_18_scale_2() { + // Precision 18 requires 8 bytes + let decimal_bytes = vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xd2]; // 1234 in 8 bytes + let expected_decimal = 1234i128; + check_raw_literal_bytes_serde_via_avro( + decimal_bytes, + Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)), + &Type::Primitive(PrimitiveType::Decimal { + precision: 18, + scale: 2, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_precision_38_scale_2() { + // Precision 38 requires 16 bytes (maximum precision) + let decimal_bytes = vec![ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0xd2, // 1234 in 16 bytes + ]; + let expected_decimal = 1234i128; + check_raw_literal_bytes_serde_via_avro( + decimal_bytes, + Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)), + &Type::Primitive(PrimitiveType::Decimal { + precision: 38, + scale: 2, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_precision_1_scale_0() { + // Precision 1 requires 1 byte + let decimal_bytes = vec![0x07]; // 7 in 1 byte + let expected_decimal = 7i128; + check_raw_literal_bytes_serde_via_avro( + decimal_bytes, + Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)), + &Type::Primitive(PrimitiveType::Decimal { + precision: 1, + scale: 0, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_precision_1_negative() { + // Precision 1 requires 1 byte, negative number + let decimal_bytes = vec![0xf9]; // -7 in 1 byte (two's complement) + let expected_decimal = -7i128; + check_raw_literal_bytes_serde_via_avro( + decimal_bytes, + Literal::Primitive(PrimitiveLiteral::Int128(expected_decimal)), + &Type::Primitive(PrimitiveType::Decimal { + precision: 1, + scale: 0, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_wrong_length() { + // 3 bytes provided, but precision 4 requires 2 bytes + let bytes = vec![1u8, 2u8, 3u8]; + check_raw_literal_bytes_error_via_avro( + bytes, + &Type::Primitive(PrimitiveType::Decimal { + precision: 4, + scale: 2, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_decimal_wrong_length_too_few() { + // 1 byte provided, but precision 9 requires 4 bytes + let bytes = vec![0x42]; + check_raw_literal_bytes_error_via_avro( + bytes, + &Type::Primitive(PrimitiveType::Decimal { + precision: 9, + scale: 2, + }), + ); + } + + #[test] + fn test_raw_literal_bytes_unsupported_type() { + let bytes = vec![1u8, 2u8, 3u8, 4u8]; + check_raw_literal_bytes_error_via_avro(bytes, &Type::Primitive(PrimitiveType::Int)); + } + #[test] fn avro_convert_test_int() { check_convert_with_avro( From 1a04f5fe45b6bf21500a43988e803c3161edb9db Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Wed, 5 Nov 2025 11:09:26 +0000 Subject: [PATCH 3/4] Thanks Matt --- crates/iceberg/src/spec/values.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index 5b895552fb..f4c6f91ff0 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -2703,9 +2703,7 @@ mod _serde { let bytes: [u8; 16] = v.as_slice().try_into().map_err(|_| { invalid_err_with_reason("bytes", "UUID must be exactly 16 bytes") })?; - Ok(Some(Literal::Primitive(PrimitiveLiteral::UInt128( - u128::from_be_bytes(bytes), - )))) + Ok(Some(Literal::uuid(uuid::Uuid::from_bytes(bytes)))) } else { Err(invalid_err_with_reason( "bytes", From e380c8b7126e2c24851f0eeb8d82b6df2cd208ac Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 6 Nov 2025 17:44:46 +0000 Subject: [PATCH 4/4] Thanks Renjie! --- crates/iceberg/src/spec/values.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index f4c6f91ff0..2b4ac927c3 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -2712,13 +2712,7 @@ mod _serde { } } Type::Primitive(PrimitiveType::Decimal { precision, .. }) => { - let required_bytes = - Type::decimal_required_bytes(*precision).map_err(|e| { - invalid_err_with_reason( - "bytes", - &format!("Invalid decimal precision: {}", e), - ) - })? as usize; + let required_bytes = Type::decimal_required_bytes(*precision)? as usize; if v.len() == required_bytes { // Pad the bytes to 16 bytes (i128 size) with sign extension