From deca6967edfd8a12cee565f7af530dd0af1ff2e6 Mon Sep 17 00:00:00 2001 From: Colin Marc Date: Thu, 31 Jul 2025 12:53:39 +0200 Subject: [PATCH 1/2] fix(iceberg-datafusion): handle timestamp predicates from DF DataFusion sometimes passes dates as string literals, but can also pass timestamp ScalarValues, which need to be converted to predicates correctly in order to enable partition pruning. --- Cargo.lock | 1 + crates/iceberg/src/spec/values.rs | 232 +++++++++++++++++- crates/integrations/datafusion/Cargo.toml | 1 + .../src/physical_plan/expr_to_predicate.rs | 140 +++++++++++ 4 files changed, 369 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1cacc75fb9..a47796c74c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3675,6 +3675,7 @@ version = "0.7.0" dependencies = [ "anyhow", "async-trait", + "chrono", "datafusion", "expect-test", "futures", diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index d06e754dcc..af368b0924 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -1195,11 +1195,57 @@ impl Datum { (PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => { Ok(Datum::i64_to_i32(*val)) } - (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamp) => { - Ok(Datum::timestamp_micros(*val)) - } - (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamptz) => { - Ok(Datum::timestamptz_micros(*val)) + (PrimitiveLiteral::Long(val), source_type, target_type) => { + match (source_type, target_type) { + (_, PrimitiveType::Long) => Ok(Datum::long(*val)), + ( + PrimitiveType::Long + | PrimitiveType::Timestamp + | PrimitiveType::Timestamptz, + PrimitiveType::Timestamp, + ) => Ok(Datum::timestamp_micros(*val)), + ( + PrimitiveType::Long + | PrimitiveType::Timestamp + | PrimitiveType::Timestamptz, + PrimitiveType::Timestamptz, + ) => Ok(Datum::timestamptz_micros(*val)), + ( + PrimitiveType::Long + | PrimitiveType::TimestampNs + | PrimitiveType::TimestamptzNs, + PrimitiveType::TimestampNs, + ) => Ok(Datum::timestamp_nanos(*val)), + ( + PrimitiveType::Long + | PrimitiveType::TimestampNs + | PrimitiveType::TimestamptzNs, + PrimitiveType::TimestamptzNs, + ) => Ok(Datum::timestamptz_nanos(*val)), + ( + PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, + PrimitiveType::Timestamp, + ) => Ok(Datum::timestamp_micros(val / 1000)), + ( + PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, + PrimitiveType::Timestamptz, + ) => Ok(Datum::timestamptz_micros(val / 1000)), + ( + PrimitiveType::Timestamp | PrimitiveType::Timestamptz, + PrimitiveType::TimestampNs, + ) => Ok(Datum::timestamp_nanos(val * 1000)), + ( + PrimitiveType::Timestamp | PrimitiveType::Timestamptz, + PrimitiveType::TimestamptzNs, + ) => Ok(Datum::timestamptz_nanos(val * 1000)), + _ => Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Can't convert datum from {} type to {} type.", + self.r#type, target_primitive_type + ), + )), + } } // Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775 (PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => { @@ -3942,4 +3988,180 @@ mod tests { assert_eq!(double_sorted, double_expected); } + + #[test] + fn test_datum_timestamp_nanos_convert_to_timestamp_micros() { + let datum = Datum::timestamp_nanos(12345000); + + let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); + + let expected = Datum::timestamp_micros(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamp_nanos_convert_to_timestamptz_micros() { + let datum = Datum::timestamp_nanos(12345000); + + let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); + + let expected = Datum::timestamptz_micros(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamptz_nanos_convert_to_timestamp_micros() { + let datum = Datum::timestamptz_nanos(12345000); + + let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); + + let expected = Datum::timestamp_micros(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamptz_nanos_convert_to_timestamptz_micros() { + let datum = Datum::timestamptz_nanos(12345000); + + let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); + + let expected = Datum::timestamptz_micros(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamp_micros_convert_to_timestamp_nanos() { + let datum = Datum::timestamp_micros(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); + + let expected = Datum::timestamp_nanos(12345000); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamp_micros_convert_to_timestamptz_nanos() { + let datum = Datum::timestamp_micros(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); + + let expected = Datum::timestamptz_nanos(12345000); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamptz_micros_convert_to_timestamp_nanos() { + let datum = Datum::timestamptz_micros(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); + + let expected = Datum::timestamp_nanos(12345000); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamptz_micros_convert_to_timestamptz_nanos() { + let datum = Datum::timestamptz_micros(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); + + let expected = Datum::timestamptz_nanos(12345000); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamp_nanos_convert_to_timestamp_nanos() { + let datum = Datum::timestamp_nanos(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); + + let expected = Datum::timestamp_nanos(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamp_nanos_convert_to_timestamptz_nanos() { + let datum = Datum::timestamp_nanos(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); + + let expected = Datum::timestamptz_nanos(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamptz_nanos_convert_to_timestamp_nanos() { + let datum = Datum::timestamptz_nanos(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); + + let expected = Datum::timestamp_nanos(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamptz_nanos_convert_to_timestamptz_nanos() { + let datum = Datum::timestamptz_nanos(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); + + let expected = Datum::timestamptz_nanos(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_long_convert_to_timestamp_nanos() { + let datum = Datum::long(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); + + let expected = Datum::timestamp_nanos(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_long_convert_to_timestamptz_nanos() { + let datum = Datum::long(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); + + let expected = Datum::timestamptz_nanos(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamp_nanos_to_micros() { + let datum = Datum::timestamp_nanos(12345678); + + let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); + + let expected = Datum::timestamp_micros(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_timestamp_micros_to_nanos() { + let datum = Datum::timestamp_micros(12345); + + let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); + + let expected = Datum::timestamp_nanos(12345000); + + assert_eq!(result, expected); + } } diff --git a/crates/integrations/datafusion/Cargo.toml b/crates/integrations/datafusion/Cargo.toml index 0ee1738b4f..d5260f7770 100644 --- a/crates/integrations/datafusion/Cargo.toml +++ b/crates/integrations/datafusion/Cargo.toml @@ -31,6 +31,7 @@ repository = { workspace = true } [dependencies] anyhow = { workspace = true } async-trait = { workspace = true } +chrono = { workspace = true } datafusion = { workspace = true } futures = { workspace = true } iceberg = { workspace = true } diff --git a/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs b/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs index 2974dd2642..8eec27a8da 100644 --- a/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs +++ b/crates/integrations/datafusion/src/physical_plan/expr_to_predicate.rs @@ -17,6 +17,7 @@ use std::vec; +use chrono::{FixedOffset, TimeZone as _}; use datafusion::arrow::datatypes::DataType; use datafusion::logical_expr::{Expr, Operator}; use datafusion::scalar::ScalarValue; @@ -201,6 +202,9 @@ fn reverse_predicate_operator(op: PredicateOperator) -> PredicateOperator { } const MILLIS_PER_DAY: i64 = 24 * 60 * 60 * 1000; +const MICROS_PER_SECOND: i64 = 1_000_000; +const MICROS_PER_MILLISECOND: i64 = 1_000; + /// Convert a scalar value to an iceberg datum. fn scalar_value_to_datum(value: &ScalarValue) -> Option { match value { @@ -214,18 +218,46 @@ fn scalar_value_to_datum(value: &ScalarValue) -> Option { ScalarValue::LargeUtf8(Some(v)) => Some(Datum::string(v.clone())), ScalarValue::Date32(Some(v)) => Some(Datum::date(*v)), ScalarValue::Date64(Some(v)) => Some(Datum::date((*v / MILLIS_PER_DAY) as i32)), + ScalarValue::TimestampSecond(Some(v), tz) => { + interpret_timestamptz_micros(v.checked_mul(MICROS_PER_SECOND)?, tz.as_deref()) + } + ScalarValue::TimestampMillisecond(Some(v), tz) => { + interpret_timestamptz_micros(v.checked_mul(MICROS_PER_MILLISECOND)?, tz.as_deref()) + } + ScalarValue::TimestampMicrosecond(Some(v), tz) => { + interpret_timestamptz_micros(*v, tz.as_deref()) + } + ScalarValue::TimestampNanosecond(Some(v), Some(_)) => Some(Datum::timestamptz_nanos(*v)), + ScalarValue::TimestampNanosecond(Some(v), None) => Some(Datum::timestamp_nanos(*v)), _ => None, } } +fn interpret_timestamptz_micros(micros: i64, tz: Option>) -> Option { + let offset = tz + .as_ref() + .and_then(|s| s.as_ref().parse::().ok()); + + match offset { + Some(off) => off + .timestamp_micros(micros) + .single() + .map(Datum::timestamptz_from_datetime), + None => Some(Datum::timestamp_micros(micros)), + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; + use chrono::DateTime; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; use datafusion::common::DFSchema; use datafusion::logical_expr::utils::split_conjunction; + use datafusion::logical_expr::{Operator, binary_expr, col, lit}; use datafusion::prelude::{Expr, SessionContext}; + use datafusion::scalar::ScalarValue; use iceberg::expr::{Predicate, Reference}; use iceberg::spec::Datum; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; @@ -245,6 +277,42 @@ mod tests { Field::new("ts", DataType::Timestamp(TimeUnit::Second, None), true).with_metadata( HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "3".to_string())]), ), + Field::new( + "ts_ms", + DataType::Timestamp(TimeUnit::Millisecond, None), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "4".to_string(), + )])), + Field::new( + "ts_us", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "5".to_string(), + )])), + Field::new( + "ts_ns", + DataType::Timestamp(TimeUnit::Nanosecond, None), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "6".to_string(), + )])), + Field::new( + "ts_tz", + DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), + true, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "7".to_string(), + )])), ]); DFSchema::try_from_qualified_schema("my_table", &arrow_schema).unwrap() } @@ -429,4 +497,76 @@ mod tests { let predicate = convert_to_iceberg_predicate(sql); assert_eq!(predicate, None); } + + #[test] + fn test_predicate_conversion_with_timestamp() { + // 2023-01-01 12:00:00 UTC + let timestamp_scalar = ScalarValue::TimestampSecond(Some(1672574400), None); + let dt = DateTime::parse_from_rfc3339("2023-01-01T12:00:00+00:00").unwrap(); + + let expr = binary_expr(col("ts"), Operator::Gt, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = + Reference::new("ts").greater_than(Datum::timestamp_from_datetime(dt.naive_utc())); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_milliseconds() { + // 2023-01-01 12:00:00 UTC + let timestamp_scalar = ScalarValue::TimestampMillisecond(Some(1672574400000), None); + let dt = DateTime::parse_from_rfc3339("2023-01-01T12:00:00+00:00").unwrap(); + + let expr = binary_expr(col("ts_ms"), Operator::LtEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = Reference::new("ts_ms") + .less_than_or_equal_to(Datum::timestamp_from_datetime(dt.naive_utc())); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_nanoseconds() { + // 2023-01-01 12:00:00 UTC + let timestamp_scalar = ScalarValue::TimestampNanosecond(Some(1672574400000000000), None); + let dt = DateTime::parse_from_rfc3339("2023-01-01T12:00:00+00:00").unwrap(); + + let expr = binary_expr(col("ts_ns"), Operator::NotEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = Reference::new("ts_ns") + .not_equal_to(Datum::timestamp_nanos(dt.timestamp_nanos_opt().unwrap())); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_with_timezone() { + // 2023-01-01 13:00:00 +01:00 + let timestamp_scalar = + ScalarValue::TimestampSecond(Some(1672574400), Some("+01:00".into())); + let dt = DateTime::parse_from_rfc3339("2023-01-01T13:00:00+01:00").unwrap(); + + let expr = binary_expr(col("ts_tz"), Operator::GtEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = + Reference::new("ts_tz").greater_than_or_equal_to(Datum::timestamptz_from_datetime(dt)); + assert_eq!(predicate, expected_predicate); + } + + #[test] + fn test_predicate_conversion_with_timestamp_nanoseconds_with_timezone() { + // 2023-01-01 13:00:00 +01:00 + let timestamp_scalar = + ScalarValue::TimestampNanosecond(Some(1672574400000000000), Some("+01:00".into())); + let dt = DateTime::parse_from_rfc3339("2023-01-01T13:00:00+01:00").unwrap(); + + let expr = binary_expr(col("ts_tz"), Operator::GtEq, lit(timestamp_scalar)); + let exprs: Vec = split_conjunction(&expr).into_iter().cloned().collect(); + let predicate = convert_filters_to_predicate(&exprs[..]).unwrap(); + let expected_predicate = Reference::new("ts_tz") + .greater_than_or_equal_to(Datum::timestamptz_nanos(dt.timestamp_nanos_opt().unwrap())); + assert_eq!(predicate, expected_predicate); + } } From 86fac9aa9a8071152d64bd3f104d719a26595e5d Mon Sep 17 00:00:00 2001 From: Colin Marc Date: Thu, 31 Jul 2025 13:53:00 +0200 Subject: [PATCH 2/2] fix: handle converting Datum::date to timestamp and vice versa This helps with predicate expressions such as `date > DATE_TRUNC('day', ts)`. --- crates/iceberg/src/spec/values.rs | 701 +++++++++++++----------------- 1 file changed, 310 insertions(+), 391 deletions(-) diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index af368b0924..ba436adf29 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -60,6 +60,10 @@ const INT_MIN: i32 = -2147483648; const LONG_MAX: i64 = 9223372036854775807; const LONG_MIN: i64 = -9223372036854775808; +const MICROS_PER_DAY: i64 = 24 * 60 * 60 * 1_000_000; +const NANOS_PER_MICRO: i64 = 1000; +const NANOS_PER_DAY: i64 = NANOS_PER_MICRO * MICROS_PER_DAY; + /// Values present in iceberg type #[derive(Clone, Debug, PartialOrd, PartialEq, Hash, Eq)] pub enum PrimitiveLiteral { @@ -1189,63 +1193,11 @@ impl Datum { match target_type { Type::Primitive(target_primitive_type) => { match (&self.literal, &self.r#type, target_primitive_type) { - (PrimitiveLiteral::Int(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)), - (PrimitiveLiteral::Int(val), _, PrimitiveType::Date) => Ok(Datum::date(*val)), - (PrimitiveLiteral::Int(val), _, PrimitiveType::Long) => Ok(Datum::long(*val)), - (PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => { - Ok(Datum::i64_to_i32(*val)) + (PrimitiveLiteral::Int(val), source_type, target_type) => { + convert_int(*val, source_type, target_type) } (PrimitiveLiteral::Long(val), source_type, target_type) => { - match (source_type, target_type) { - (_, PrimitiveType::Long) => Ok(Datum::long(*val)), - ( - PrimitiveType::Long - | PrimitiveType::Timestamp - | PrimitiveType::Timestamptz, - PrimitiveType::Timestamp, - ) => Ok(Datum::timestamp_micros(*val)), - ( - PrimitiveType::Long - | PrimitiveType::Timestamp - | PrimitiveType::Timestamptz, - PrimitiveType::Timestamptz, - ) => Ok(Datum::timestamptz_micros(*val)), - ( - PrimitiveType::Long - | PrimitiveType::TimestampNs - | PrimitiveType::TimestamptzNs, - PrimitiveType::TimestampNs, - ) => Ok(Datum::timestamp_nanos(*val)), - ( - PrimitiveType::Long - | PrimitiveType::TimestampNs - | PrimitiveType::TimestamptzNs, - PrimitiveType::TimestamptzNs, - ) => Ok(Datum::timestamptz_nanos(*val)), - ( - PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, - PrimitiveType::Timestamp, - ) => Ok(Datum::timestamp_micros(val / 1000)), - ( - PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, - PrimitiveType::Timestamptz, - ) => Ok(Datum::timestamptz_micros(val / 1000)), - ( - PrimitiveType::Timestamp | PrimitiveType::Timestamptz, - PrimitiveType::TimestampNs, - ) => Ok(Datum::timestamp_nanos(val * 1000)), - ( - PrimitiveType::Timestamp | PrimitiveType::Timestamptz, - PrimitiveType::TimestamptzNs, - ) => Ok(Datum::timestamptz_nanos(val * 1000)), - _ => Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Can't convert datum from {} type to {} type.", - self.r#type, target_primitive_type - ), - )), - } + convert_long(*val, source_type, target_type) } // Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775 (PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => { @@ -1321,6 +1273,105 @@ impl Datum { } } +/// Converts an int literal between two [PrimitiveType]s. +fn convert_int( + val: i32, + source_type: &PrimitiveType, + target_type: &PrimitiveType, +) -> Result { + let datum = match (source_type, target_type) { + (_, PrimitiveType::Int) => Datum::int(val), + (_, PrimitiveType::Date) => Datum::date(val), + (_, PrimitiveType::Long) => Datum::long(val), + (PrimitiveType::Date, PrimitiveType::Timestamp) => Datum::timestamp_micros( + (val as i64) + .checked_mul(MICROS_PER_DAY) + .ok_or_else(overflow)?, + ), + (PrimitiveType::Date, PrimitiveType::Timestamptz) => Datum::timestamptz_micros( + (val as i64) + .checked_mul(MICROS_PER_DAY) + .ok_or_else(overflow)?, + ), + (PrimitiveType::Date, PrimitiveType::TimestampNs) => Datum::timestamp_nanos( + (val as i64) + .checked_mul(NANOS_PER_DAY) + .ok_or_else(overflow)?, + ), + (PrimitiveType::Date, PrimitiveType::TimestamptzNs) => Datum::timestamptz_nanos( + (val as i64) + .checked_mul(NANOS_PER_DAY) + .ok_or_else(overflow)?, + ), + _ => { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("Can't convert datum from {source_type} type to {target_type} type.",), + )); + } + }; + + Ok(datum) +} + +/// Converts a long literal between two [PrimitiveType]s. +fn convert_long( + val: i64, + source_type: &PrimitiveType, + target_type: &PrimitiveType, +) -> Result { + let datum = match (source_type, target_type) { + (_, PrimitiveType::Int) => Datum::i64_to_i32(val), + (_, PrimitiveType::Long) => Datum::long(val), + ( + PrimitiveType::Long | PrimitiveType::Timestamp | PrimitiveType::Timestamptz, + PrimitiveType::Timestamp, + ) => Datum::timestamp_micros(val), + ( + PrimitiveType::Long | PrimitiveType::Timestamp | PrimitiveType::Timestamptz, + PrimitiveType::Timestamptz, + ) => Datum::timestamptz_micros(val), + ( + PrimitiveType::Long | PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, + PrimitiveType::TimestampNs, + ) => Datum::timestamp_nanos(val), + ( + PrimitiveType::Long | PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, + PrimitiveType::TimestamptzNs, + ) => Datum::timestamptz_nanos(val), + (PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, PrimitiveType::Timestamp) => { + Datum::timestamp_micros(val / NANOS_PER_MICRO) + } + (PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, PrimitiveType::Timestamptz) => { + Datum::timestamptz_micros(val / NANOS_PER_MICRO) + } + (PrimitiveType::Timestamp | PrimitiveType::Timestamptz, PrimitiveType::TimestampNs) => { + Datum::timestamp_nanos(val.checked_mul(NANOS_PER_MICRO).ok_or_else(overflow)?) + } + (PrimitiveType::Timestamp | PrimitiveType::Timestamptz, PrimitiveType::TimestamptzNs) => { + Datum::timestamptz_nanos(val.checked_mul(NANOS_PER_MICRO).ok_or_else(overflow)?) + } + (PrimitiveType::Timestamp | PrimitiveType::Timestamptz, PrimitiveType::Date) => { + Datum::date((val / MICROS_PER_DAY) as i32) + } + (PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs, PrimitiveType::Date) => { + Datum::date((val / (NANOS_PER_DAY)) as i32) + } + _ => { + return Err(Error::new( + ErrorKind::DataInvalid, + format!("Can't convert datum from {source_type} type to {target_type} type."), + )); + } + }; + + Ok(datum) +} + +fn overflow() -> Error { + Error::new(ErrorKind::DataInvalid, "integer overflow") +} + /// Map is a collection of key-value pairs with a key type and a value type. /// It used in Literal::Map, to make it hashable, the order of key-value pairs is stored in a separate vector /// so that we can hash the map in a deterministic way. But it also means that the order of key-value pairs is matter @@ -3757,169 +3808,213 @@ mod tests { test_fn(datum); } - #[test] - fn test_datum_date_convert_to_int() { - let datum_date = Datum::date(12345); - - let result = datum_date.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::int(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_int_convert_to_date() { - let datum_int = Datum::int(12345); - - let result = datum_int.to(&Primitive(PrimitiveType::Date)).unwrap(); - - let expected = Datum::date(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_int() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::int(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_int_above_max() { - let datum = Datum::long(INT_MAX as i64 + 1); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_int_below_min() { - let datum = Datum::long(INT_MIN as i64 - 1); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_timestamp() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); + macro_rules! datum_convert_tests { + ($($name:ident: ($source:expr => $target_type:ident, $expected:expr),)*) => { + $( + #[test] + fn $name() { + let source = $source; + let target_type = PrimitiveType::$target_type; + let expected = $expected; - let expected = Datum::timestamp_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_timestamptz() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); - - let expected = Datum::timestamptz_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_decimal_convert_to_long() { - let datum = Datum::decimal(12345).unwrap(); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::long(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_decimal_convert_to_long_above_max() { - let datum = Datum::decimal(LONG_MAX as i128 + 1).unwrap(); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::new(PrimitiveType::Long, PrimitiveLiteral::AboveMax); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_decimal_convert_to_long_below_min() { - let datum = Datum::decimal(LONG_MIN as i128 - 1).unwrap(); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::new(PrimitiveType::Long, PrimitiveLiteral::BelowMin); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_string_convert_to_boolean() { - let datum = Datum::string("true"); - - let result = datum.to(&Primitive(PrimitiveType::Boolean)).unwrap(); - - let expected = Datum::bool(true); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_string_convert_to_int() { - let datum = Datum::string("12345"); - - let result = datum.to(&Primitive(PrimitiveType::Int)).unwrap(); - - let expected = Datum::int(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_string_convert_to_long() { - let datum = Datum::string("12345"); - - let result = datum.to(&Primitive(PrimitiveType::Long)).unwrap(); - - let expected = Datum::long(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_string_convert_to_timestamp() { - let datum = Datum::string("1925-05-20T19:25:00.000"); - - let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); - - let expected = Datum::timestamp_micros(-1407990900000000); - - assert_eq!(result, expected); + let result = source.to(&Primitive(target_type)).unwrap(); + assert_eq!(result, expected); + } + )* + } } - #[test] - fn test_datum_string_convert_to_timestamptz() { - let datum = Datum::string("1925-05-20T19:25:00.000 UTC"); - - let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); - - let expected = Datum::timestamptz_micros(-1407990900000000); - - assert_eq!(result, expected); + datum_convert_tests! { + // Date conversions + convert_date_to_int: ( + Datum::date(12345) => Int, + Datum::int(12345) + ), + convert_int_to_date: ( + Datum::int(12345) => Date, + Datum::date(12345) + ), + convert_date_to_timestamp: ( + Datum::date(1) => Timestamp, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("1970-01-02T00:00:00Z") + .unwrap() + .naive_utc() + ) + ), + convert_date_to_timestamptz: ( + Datum::date(1) => Timestamptz, + Datum::timestamptz_from_str("1970-01-02T00:00:00Z").unwrap() + ), + convert_date_to_timestamp_nanos: ( + Datum::date(1) => TimestampNs, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("1970-01-02T00:00:00Z") + .unwrap() + .naive_utc() + ) + .to(&Primitive(PrimitiveType::TimestampNs)) + .unwrap() + ), + convert_date_to_timestamptz_nanos: ( + Datum::date(1) => TimestamptzNs, + Datum::timestamptz_from_datetime( + DateTime::parse_from_rfc3339("1970-01-02T00:00:00Z").unwrap() + ) + .to(&Primitive(PrimitiveType::TimestamptzNs)) + .unwrap() + ), + convert_date_negative_to_timestamp: ( + Datum::date(-1) => Timestamp, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("1969-12-31T00:00:00Z") + .unwrap() + .naive_utc() + ) + ), + convert_date_to_timestamp_year_9999: ( + Datum::date(2932896) => Timestamp, + Datum::timestamp_from_datetime( + DateTime::parse_from_rfc3339("9999-12-31T00:00:00Z") + .unwrap() + .naive_utc() + ) + ), + // Long conversions + convert_long_to_int: ( + Datum::long(12345) => Int, + Datum::int(12345) + ), + convert_long_to_int_above_max: ( + Datum::long(INT_MAX as i64 + 1) => Int, + Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax) + ), + convert_long_to_int_below_min: ( + Datum::long(INT_MIN as i64 - 1) => Int, + Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin) + ), + convert_long_to_timestamp: ( + Datum::long(12345) => Timestamp, + Datum::timestamp_micros(12345) + ), + convert_long_to_timestamptz: ( + Datum::long(12345) => Timestamptz, + Datum::timestamptz_micros(12345) + ), + convert_long_to_timestamp_nanos: ( + Datum::long(12345) => TimestampNs, + Datum::timestamp_nanos(12345) + ), + convert_long_to_timestamptz_nanos: ( + Datum::long(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345) + ), + // Decimal conversions + convert_decimal_to_long: ( + Datum::decimal(12345).unwrap() => Long, + Datum::long(12345) + ), + convert_decimal_to_long_above_max: ( + Datum::decimal(LONG_MAX as i128 + 1).unwrap() => Long, + Datum::new(PrimitiveType::Long, PrimitiveLiteral::AboveMax) + ), + convert_decimal_to_long_below_min: ( + Datum::decimal(LONG_MIN as i128 - 1).unwrap() => Long, + Datum::new(PrimitiveType::Long, PrimitiveLiteral::BelowMin) + ), + // String conversions + convert_string_to_boolean: ( + Datum::string("true") => Boolean, + Datum::bool(true) + ), + convert_string_to_int: ( + Datum::string("12345") => Int, + Datum::int(12345) + ), + convert_string_to_long: ( + Datum::string("12345") => Long, + Datum::long(12345) + ), + convert_string_to_timestamp: ( + Datum::string("1925-05-20T19:25:00.000") => Timestamp, + Datum::timestamp_micros(-1407990900000000) + ), + convert_string_to_timestamptz: ( + Datum::string("1925-05-20T19:25:00.000 UTC") => Timestamptz, + Datum::timestamptz_micros(-1407990900000000) + ), + // Timestamp conversions (micros to nanos) + convert_timestamp_micros_to_timestamp_nanos: ( + Datum::timestamp_micros(12345) => TimestampNs, + Datum::timestamp_nanos(12345000) + ), + convert_timestamp_micros_to_timestamptz_nanos: ( + Datum::timestamp_micros(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345000) + ), + convert_timestamptz_micros_to_timestamp_nanos: ( + Datum::timestamptz_micros(12345) => TimestampNs, + Datum::timestamp_nanos(12345000) + ), + convert_timestamptz_micros_to_timestamptz_nanos: ( + Datum::timestamptz_micros(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345000) + ), + // Timestamp conversions (nanos to micros) + convert_timestamp_nanos_to_timestamp_micros: ( + Datum::timestamp_nanos(12345000) => Timestamp, + Datum::timestamp_micros(12345) + ), + test_datum_timestamp_micros_to_nanos: ( + Datum::timestamp_nanos(12345678) => Timestamp, + Datum::timestamp_micros(12345) + ), + convert_timestamp_nanos_to_timestamptz_micros: ( + Datum::timestamp_nanos(12345000) => Timestamptz, + Datum::timestamptz_micros(12345) + ), + convert_timestamptz_nanos_to_timestamp_micros: ( + Datum::timestamptz_nanos(12345000) => Timestamp, + Datum::timestamp_micros(12345) + ), + convert_timestamptz_nanos_to_timestamptz_micros: ( + Datum::timestamptz_nanos(12345000) => Timestamptz, + Datum::timestamptz_micros(12345) + ), + // Timestamp conversions (nanos to nanos) + convert_timestamp_nanos_to_timestamp_nanos: ( + Datum::timestamp_nanos(12345) => TimestampNs, + Datum::timestamp_nanos(12345) + ), + convert_timestamp_nanos_to_timestamptz_nanos: ( + Datum::timestamp_nanos(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345) + ), + convert_timestamptz_nanos_to_timestamp_nanos: ( + Datum::timestamptz_nanos(12345) => TimestampNs, + Datum::timestamp_nanos(12345) + ), + convert_timestamptz_nanos_to_timestamptz_nanos: ( + Datum::timestamptz_nanos(12345) => TimestamptzNs, + Datum::timestamptz_nanos(12345) + ), + // Timestamp to date conversions + convert_timestamp_to_date: ( + Datum::timestamp_micros(24 * 60 * 60 * 1_000_000) => Date, + Datum::date(1) + ), + convert_timestamptz_to_date: ( + Datum::timestamptz_micros(24 * 60 * 60 * 1_000_000) => Date, + Datum::date(1) + ), + convert_timestamp_nanos_to_date: ( + Datum::timestamp_nanos(24 * 60 * 60 * 1_000_000_000) => Date, + Datum::date(1) + ), + convert_timestamptz_nanos_to_date: ( + Datum::timestamptz_nanos(24 * 60 * 60 * 1_000_000_000) => Date, + Datum::date(1) + ), } #[test] @@ -3988,180 +4083,4 @@ mod tests { assert_eq!(double_sorted, double_expected); } - - #[test] - fn test_datum_timestamp_nanos_convert_to_timestamp_micros() { - let datum = Datum::timestamp_nanos(12345000); - - let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); - - let expected = Datum::timestamp_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamp_nanos_convert_to_timestamptz_micros() { - let datum = Datum::timestamp_nanos(12345000); - - let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); - - let expected = Datum::timestamptz_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamptz_nanos_convert_to_timestamp_micros() { - let datum = Datum::timestamptz_nanos(12345000); - - let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); - - let expected = Datum::timestamp_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamptz_nanos_convert_to_timestamptz_micros() { - let datum = Datum::timestamptz_nanos(12345000); - - let result = datum.to(&Primitive(PrimitiveType::Timestamptz)).unwrap(); - - let expected = Datum::timestamptz_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamp_micros_convert_to_timestamp_nanos() { - let datum = Datum::timestamp_micros(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); - - let expected = Datum::timestamp_nanos(12345000); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamp_micros_convert_to_timestamptz_nanos() { - let datum = Datum::timestamp_micros(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); - - let expected = Datum::timestamptz_nanos(12345000); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamptz_micros_convert_to_timestamp_nanos() { - let datum = Datum::timestamptz_micros(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); - - let expected = Datum::timestamp_nanos(12345000); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamptz_micros_convert_to_timestamptz_nanos() { - let datum = Datum::timestamptz_micros(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); - - let expected = Datum::timestamptz_nanos(12345000); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamp_nanos_convert_to_timestamp_nanos() { - let datum = Datum::timestamp_nanos(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); - - let expected = Datum::timestamp_nanos(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamp_nanos_convert_to_timestamptz_nanos() { - let datum = Datum::timestamp_nanos(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); - - let expected = Datum::timestamptz_nanos(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamptz_nanos_convert_to_timestamp_nanos() { - let datum = Datum::timestamptz_nanos(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); - - let expected = Datum::timestamp_nanos(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamptz_nanos_convert_to_timestamptz_nanos() { - let datum = Datum::timestamptz_nanos(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); - - let expected = Datum::timestamptz_nanos(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_timestamp_nanos() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); - - let expected = Datum::timestamp_nanos(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_long_convert_to_timestamptz_nanos() { - let datum = Datum::long(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestamptzNs)).unwrap(); - - let expected = Datum::timestamptz_nanos(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamp_nanos_to_micros() { - let datum = Datum::timestamp_nanos(12345678); - - let result = datum.to(&Primitive(PrimitiveType::Timestamp)).unwrap(); - - let expected = Datum::timestamp_micros(12345); - - assert_eq!(result, expected); - } - - #[test] - fn test_datum_timestamp_micros_to_nanos() { - let datum = Datum::timestamp_micros(12345); - - let result = datum.to(&Primitive(PrimitiveType::TimestampNs)).unwrap(); - - let expected = Datum::timestamp_nanos(12345000); - - assert_eq!(result, expected); - } }