Skip to content

Commit 0f1133e

Browse files
authored
fix: partition pruning stats pruning when multiple values are present (#18923)
- Closes #18922 --------- Signed-off-by: Nimalan <nimalan.m@protonmail.com>
1 parent a060739 commit 0f1133e

File tree

1 file changed

+74
-1
lines changed

1 file changed

+74
-1
lines changed

datafusion/common/src/pruning.rs

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ impl PruningStatistics for PartitionPruningStatistics {
245245
match acc {
246246
None => Some(Some(eq_result)),
247247
Some(acc_array) => {
248-
arrow::compute::kernels::boolean::and(&acc_array, &eq_result)
248+
arrow::compute::kernels::boolean::or_kleene(&acc_array, &eq_result)
249249
.map(Some)
250250
.ok()
251251
}
@@ -560,6 +560,79 @@ mod tests {
560560
assert_eq!(partition_stats.num_containers(), 2);
561561
}
562562

563+
#[test]
564+
fn test_partition_pruning_statistics_multiple_positive_values() {
565+
let partition_values = vec![
566+
vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
567+
vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
568+
];
569+
let partition_fields = vec![
570+
Arc::new(Field::new("a", DataType::Int32, false)),
571+
Arc::new(Field::new("b", DataType::Int32, false)),
572+
];
573+
let partition_stats =
574+
PartitionPruningStatistics::try_new(partition_values, partition_fields)
575+
.unwrap();
576+
577+
let column_a = Column::new_unqualified("a");
578+
579+
let values = HashSet::from([ScalarValue::from(1i32), ScalarValue::from(3i32)]);
580+
let contained_a = partition_stats.contained(&column_a, &values).unwrap();
581+
let expected_contained_a = BooleanArray::from(vec![true, true]);
582+
assert_eq!(contained_a, expected_contained_a);
583+
}
584+
585+
#[test]
586+
fn test_partition_pruning_statistics_null_in_values() {
587+
let partition_values = vec![
588+
vec![
589+
ScalarValue::from(1i32),
590+
ScalarValue::from(2i32),
591+
ScalarValue::from(3i32),
592+
],
593+
vec![
594+
ScalarValue::from(4i32),
595+
ScalarValue::from(5i32),
596+
ScalarValue::from(6i32),
597+
],
598+
];
599+
let partition_fields = vec![
600+
Arc::new(Field::new("a", DataType::Int32, false)),
601+
Arc::new(Field::new("b", DataType::Int32, false)),
602+
Arc::new(Field::new("c", DataType::Int32, false)),
603+
];
604+
let partition_stats =
605+
PartitionPruningStatistics::try_new(partition_values, partition_fields)
606+
.unwrap();
607+
608+
let column_a = Column::new_unqualified("a");
609+
let column_b = Column::new_unqualified("b");
610+
let column_c = Column::new_unqualified("c");
611+
612+
let values_a = HashSet::from([ScalarValue::from(1i32), ScalarValue::Int32(None)]);
613+
let contained_a = partition_stats.contained(&column_a, &values_a).unwrap();
614+
let mut builder = BooleanArray::builder(2);
615+
builder.append_value(true);
616+
builder.append_null();
617+
let expected_contained_a = builder.finish();
618+
assert_eq!(contained_a, expected_contained_a);
619+
620+
// First match creates a NULL boolean array
621+
// The accumulator should update the value to true for the second value
622+
let values_b = HashSet::from([ScalarValue::Int32(None), ScalarValue::from(5i32)]);
623+
let contained_b = partition_stats.contained(&column_b, &values_b).unwrap();
624+
let mut builder = BooleanArray::builder(2);
625+
builder.append_null();
626+
builder.append_value(true);
627+
let expected_contained_b = builder.finish();
628+
assert_eq!(contained_b, expected_contained_b);
629+
630+
// All matches are null, contained should return None
631+
let values_c = HashSet::from([ScalarValue::Int32(None)]);
632+
let contained_c = partition_stats.contained(&column_c, &values_c);
633+
assert!(contained_c.is_none());
634+
}
635+
563636
#[test]
564637
fn test_partition_pruning_statistics_empty() {
565638
let partition_values = vec![];

0 commit comments

Comments
 (0)