From 3bc15207b531d1c3b96a033349f4b47b43a5e152 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Tue, 27 Feb 2024 10:10:48 +0100 Subject: [PATCH 1/4] `k_smallest` variants: update documentation --- src/lib.rs | 100 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 90 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 71c8234f5..3b13e6a37 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2981,9 +2981,27 @@ pub trait Itertools: Iterator { /// Sort the k smallest elements into a new iterator using the provided comparison. /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// /// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that - /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity. + /// [`k_smallest`](Itertools::k_smallest) corresponds to `self.sorted().take(k)`, + /// in both semantics and complexity. + /// /// Particularly, a custom heap implementation ensures the comparison is not cloned. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_smallest = numbers + /// .into_iter() + /// .k_smallest_by(5, |a, b| (a % 7).cmp(&(b % 7)).then(a.cmp(b))); + /// + /// itertools::assert_equal(five_smallest, vec![0, 7, 14, 1, 8]); + /// ``` #[cfg(feature = "use_alloc")] fn k_smallest_by(self, k: usize, cmp: F) -> VecIntoIter where @@ -2993,10 +3011,29 @@ pub trait Itertools: Iterator { k_smallest::k_smallest_general(self, k, cmp).into_iter() } - /// Return the elements producing the k smallest outputs of the provided function + /// Return the elements producing the k smallest outputs of the provided function. /// - /// This corresponds to `self.sorted_by_key(cmp).take(k)` in the same way that - /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and time complexity. + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// This corresponds to `self.sorted_by_key(key).take(k)` in the same way that + /// [`k_smallest`](Itertools::k_smallest) corresponds to `self.sorted().take(k)`, + /// in both semantics and complexity. + /// + /// Particularly, a custom heap implementation ensures the comparison is not cloned. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_smallest = numbers + /// .into_iter() + /// .k_smallest_by_key(5, |n| (n % 7, *n)); + /// + /// itertools::assert_equal(five_smallest, vec![0, 7, 14, 1, 8]); + /// ``` #[cfg(feature = "use_alloc")] fn k_smallest_by_key(self, k: usize, key: F) -> VecIntoIter where @@ -3008,9 +3045,15 @@ pub trait Itertools: Iterator { } /// Sort the k largest elements into a new iterator, in descending order. - /// Semantically equivalent to `k_smallest` with a reversed `Ord` - /// However, this is implemented by way of a custom binary heap - /// which does not have the same performance characteristics for very large `Self::Item` + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// It is semantically equivalent to [`k_smallest`](Itertools::k_smallest) + /// with a reversed `Ord`. + /// However, this is implemented with a custom binary heap which does not + /// have the same performance characteristics for very large `Self::Item`. + /// /// ``` /// use itertools::Itertools; /// @@ -3021,7 +3064,7 @@ pub trait Itertools: Iterator { /// .into_iter() /// .k_largest(5); /// - /// itertools::assert_equal(five_largest, vec![14,13,12,11,10]); + /// itertools::assert_equal(five_largest, vec![14, 13, 12, 11, 10]); /// ``` #[cfg(feature = "use_alloc")] fn k_largest(self, k: usize) -> VecIntoIter @@ -3033,7 +3076,25 @@ pub trait Itertools: Iterator { } /// Sort the k largest elements into a new iterator using the provided comparison. - /// Functionally equivalent to `k_smallest_by` with a reversed `Ord` + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// Functionally equivalent to [`k_smallest_by`](Itertools::k_smallest_by) + /// with a reversed `Ord`. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_largest = numbers + /// .into_iter() + /// .k_largest_by(5, |a, b| (a % 7).cmp(&(b % 7)).then(a.cmp(b))); + /// + /// itertools::assert_equal(five_largest, vec![13, 6, 12, 5, 11]); + /// ``` #[cfg(feature = "use_alloc")] fn k_largest_by(self, k: usize, cmp: F) -> VecIntoIter where @@ -3043,7 +3104,26 @@ pub trait Itertools: Iterator { self.k_smallest_by(k, move |a, b| cmp(b, a)) } - /// Return the elements producing the k largest outputs of the provided function + /// Return the elements producing the k largest outputs of the provided function. + /// + /// The sorted iterator, if directly collected to a `Vec`, is converted + /// without any extra copying or allocation cost. + /// + /// Functionally equivalent to [`k_smallest_by_key`](Itertools::k_smallest_by_key) + /// with a reversed `Ord`. + /// + /// ``` + /// use itertools::Itertools; + /// + /// // A random permutation of 0..15 + /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5]; + /// + /// let five_largest = numbers + /// .into_iter() + /// .k_largest_by_key(5, |n| (n % 7, *n)); + /// + /// itertools::assert_equal(five_largest, vec![13, 6, 12, 5, 11]); + /// ``` #[cfg(feature = "use_alloc")] fn k_largest_by_key(self, k: usize, key: F) -> VecIntoIter where From 7382e8d40fd5af776cc031729e36d5f74b50f943 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:07:40 +0100 Subject: [PATCH 2/4] `k_smallest` variants with `FnMut` functions I totally missed this in my review earlier. We usually allow `FnMut` functions rather than only `Fn` ones. --- src/k_smallest.rs | 4 ++-- src/lib.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/k_smallest.rs b/src/k_smallest.rs index 766021b65..4de1b25e0 100644 --- a/src/k_smallest.rs +++ b/src/k_smallest.rs @@ -87,9 +87,9 @@ where } #[inline] -pub(crate) fn key_to_cmp(key: F) -> impl Fn(&T, &T) -> Ordering +pub(crate) fn key_to_cmp(mut key: F) -> impl FnMut(&T, &T) -> Ordering where - F: Fn(&T) -> K, + F: FnMut(&T) -> K, K: Ord, { move |a, b| key(a).cmp(&key(b)) diff --git a/src/lib.rs b/src/lib.rs index 3b13e6a37..64f2b5ac4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3006,7 +3006,7 @@ pub trait Itertools: Iterator { fn k_smallest_by(self, k: usize, cmp: F) -> VecIntoIter where Self: Sized, - F: Fn(&Self::Item, &Self::Item) -> Ordering, + F: FnMut(&Self::Item, &Self::Item) -> Ordering, { k_smallest::k_smallest_general(self, k, cmp).into_iter() } @@ -3038,7 +3038,7 @@ pub trait Itertools: Iterator { fn k_smallest_by_key(self, k: usize, key: F) -> VecIntoIter where Self: Sized, - F: Fn(&Self::Item) -> K, + F: FnMut(&Self::Item) -> K, K: Ord, { self.k_smallest_by(k, k_smallest::key_to_cmp(key)) @@ -3096,10 +3096,10 @@ pub trait Itertools: Iterator { /// itertools::assert_equal(five_largest, vec![13, 6, 12, 5, 11]); /// ``` #[cfg(feature = "use_alloc")] - fn k_largest_by(self, k: usize, cmp: F) -> VecIntoIter + fn k_largest_by(self, k: usize, mut cmp: F) -> VecIntoIter where Self: Sized, - F: Fn(&Self::Item, &Self::Item) -> Ordering, + F: FnMut(&Self::Item, &Self::Item) -> Ordering, { self.k_smallest_by(k, move |a, b| cmp(b, a)) } @@ -3128,7 +3128,7 @@ pub trait Itertools: Iterator { fn k_largest_by_key(self, k: usize, key: F) -> VecIntoIter where Self: Sized, - F: Fn(&Self::Item) -> K, + F: FnMut(&Self::Item) -> K, K: Ord, { self.k_largest_by(k, k_smallest::key_to_cmp(key)) From 6242146e85a80bd18a10a4f883c8108e1752d52f Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:21:50 +0100 Subject: [PATCH 3/4] `k_smallest_range` test: no `izip` `izip` could truncate an iterator without warning us while `assert_equal` would. --- tests/test_std.rs | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/tests/test_std.rs b/tests/test_std.rs index 412986dd0..d02f252d0 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -508,34 +508,24 @@ qc::quickcheck! { let num_elements = min(k, m as _); // Compute the top and bottom k in various combinations + let sorted_smallest = sorted[..num_elements].iter().cloned(); let smallest = v.iter().cloned().k_smallest(k); let smallest_by = v.iter().cloned().k_smallest_by(k, Ord::cmp); let smallest_by_key = v.iter().cloned().k_smallest_by_key(k, |&x| x); + let sorted_largest = sorted[sorted.len() - num_elements..].iter().rev().cloned(); let largest = v.iter().cloned().k_largest(k); let largest_by = v.iter().cloned().k_largest_by(k, Ord::cmp); let largest_by_key = v.iter().cloned().k_largest_by_key(k, |&x| x); // Check the variations produce the same answers and that they're right - for (a,b,c,d) in izip!( - sorted[..num_elements].iter().cloned(), - smallest, - smallest_by, - smallest_by_key) { - assert_eq!(a,b); - assert_eq!(a,c); - assert_eq!(a,d); - } + it::assert_equal(smallest, sorted_smallest.clone()); + it::assert_equal(smallest_by, sorted_smallest.clone()); + it::assert_equal(smallest_by_key, sorted_smallest); - for (a,b,c,d) in izip!( - sorted[sorted.len()-num_elements..].iter().rev().cloned(), - largest, - largest_by, - largest_by_key) { - assert_eq!(a,b); - assert_eq!(a,c); - assert_eq!(a,d); - } + it::assert_equal(largest, sorted_largest.clone()); + it::assert_equal(largest_by, sorted_largest.clone()); + it::assert_equal(largest_by_key, sorted_largest); } } From d6ea87a2f173b4a2b972e4f37956481d3dee8ec0 Mon Sep 17 00:00:00 2001 From: Philippe-Cholet <44676486+Philippe-Cholet@users.noreply.github.com> Date: Tue, 27 Feb 2024 10:46:57 +0100 Subject: [PATCH 4/4] Add `generic_test` for `k_smallest_by` --- tests/test_std.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_std.rs b/tests/test_std.rs index d02f252d0..50110533d 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -575,6 +575,17 @@ where it::assert_equal(i.k_smallest(k), j.sorted().take(k)) } +// Similar to `k_smallest_sort` but for our custom heap implementation. +fn k_smallest_by_sort(i: I, k: u16) +where + I: Iterator + Clone, + I::Item: Ord + Debug, +{ + let j = i.clone(); + let k = k as usize; + it::assert_equal(i.k_smallest_by(k, Ord::cmp), j.sorted().take(k)) +} + macro_rules! generic_test { ($f:ident, $($t:ty),+) => { $(paste::item! { @@ -588,6 +599,7 @@ macro_rules! generic_test { } generic_test!(k_smallest_sort, u8, u16, u32, u64, i8, i16, i32, i64); +generic_test!(k_smallest_by_sort, u8, u16, u32, u64, i8, i16, i32, i64); #[test] fn sorted_by_key() {