Skip to content

Commit e43e8bf

Browse files
authored
Add additional test coverage of multi-value PartitionPruningStats (#19021)
## Which issue does this PR close? - Follow on to #18923 ## Rationale for this change I was confused about some of the tests for `PartitionPruningStatistics` so let's add some more comments to explain what it is doing, and add additional coverage for multi-value columns ## What changes are included in this PR? Add a new test ## Are these changes tested? Only tests ## Are there any user-facing changes? No
1 parent 477053d commit e43e8bf

File tree

1 file changed

+31
-16
lines changed

1 file changed

+31
-16
lines changed

datafusion/common/src/pruning.rs

Lines changed: 31 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -493,8 +493,14 @@ mod tests {
493493
use arrow::datatypes::{DataType, Field};
494494
use std::sync::Arc;
495495

496-
#[test]
497-
fn test_partition_pruning_statistics() {
496+
/// return a PartitionPruningStatistics for two columns 'a' and 'b'
497+
/// and the following stats
498+
///
499+
/// | a | b |
500+
/// | - | - |
501+
/// | 1 | 2 |
502+
/// | 3 | 4 |
503+
fn partition_pruning_statistics_setup() -> PartitionPruningStatistics {
498504
let partition_values = vec![
499505
vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
500506
vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
@@ -503,9 +509,12 @@ mod tests {
503509
Arc::new(Field::new("a", DataType::Int32, false)),
504510
Arc::new(Field::new("b", DataType::Int32, false)),
505511
];
506-
let partition_stats =
507-
PartitionPruningStatistics::try_new(partition_values, partition_fields)
508-
.unwrap();
512+
PartitionPruningStatistics::try_new(partition_values, partition_fields).unwrap()
513+
}
514+
515+
#[test]
516+
fn test_partition_pruning_statistics() {
517+
let partition_stats = partition_pruning_statistics_setup();
509518

510519
let column_a = Column::new_unqualified("a");
511520
let column_b = Column::new_unqualified("b");
@@ -562,26 +571,32 @@ mod tests {
562571

563572
#[test]
564573
fn test_partition_pruning_statistics_multiple_positive_values() {
565-
let partition_values = vec![
566-
vec![ScalarValue::from(1i32), ScalarValue::from(2i32)],
567-
vec![ScalarValue::from(3i32), ScalarValue::from(4i32)],
568-
];
569-
let partition_fields = vec![
570-
Arc::new(Field::new("a", DataType::Int32, false)),
571-
Arc::new(Field::new("b", DataType::Int32, false)),
572-
];
573-
let partition_stats =
574-
PartitionPruningStatistics::try_new(partition_values, partition_fields)
575-
.unwrap();
574+
let partition_stats = partition_pruning_statistics_setup();
576575

577576
let column_a = Column::new_unqualified("a");
578577

578+
// The two containers have `a` values 1 and 3, so they both only contain values from 1 and 3
579579
let values = HashSet::from([ScalarValue::from(1i32), ScalarValue::from(3i32)]);
580580
let contained_a = partition_stats.contained(&column_a, &values).unwrap();
581581
let expected_contained_a = BooleanArray::from(vec![true, true]);
582582
assert_eq!(contained_a, expected_contained_a);
583583
}
584584

585+
#[test]
586+
fn test_partition_pruning_statistics_multiple_negative_values() {
587+
let partition_stats = partition_pruning_statistics_setup();
588+
589+
let column_a = Column::new_unqualified("a");
590+
591+
// The two containers have `a` values 1 and 3,
592+
// so the first contains ONLY values from 1,2
593+
// but the second does not
594+
let values = HashSet::from([ScalarValue::from(1i32), ScalarValue::from(2i32)]);
595+
let contained_a = partition_stats.contained(&column_a, &values).unwrap();
596+
let expected_contained_a = BooleanArray::from(vec![true, false]);
597+
assert_eq!(contained_a, expected_contained_a);
598+
}
599+
585600
#[test]
586601
fn test_partition_pruning_statistics_null_in_values() {
587602
let partition_values = vec![

0 commit comments

Comments
 (0)