@@ -2732,19 +2732,40 @@ pub trait Itertools : Iterator {
27322732 /// itertools::assert_equal(five_smallest, 0..5);
27332733 /// ```
27342734 #[ cfg( feature = "use_alloc" ) ]
2735- fn k_smallest ( self , k : usize ) -> VecIntoIter < Self :: Item >
2735+ fn k_smallest ( mut self , k : usize ) -> VecIntoIter < Self :: Item >
27362736 where
27372737 Self : Sized ,
27382738 Self :: Item : Ord ,
27392739 {
2740- crate :: k_smallest:: k_smallest_general ( self , k, Self :: Item :: cmp)
2740+ // The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general.
2741+ // While the difference is unlikely to have practical impact unless `T` is very large, this method uses the stdlib structure
2742+ // to maintain performance compared to previous versions of the crate.
2743+ use alloc:: collections:: BinaryHeap ;
2744+
2745+ if k == 0 {
2746+ return vec ! [ ] . into_iter ( ) ;
2747+ }
2748+
2749+ let mut heap = self . by_ref ( ) . take ( k) . collect :: < BinaryHeap < _ > > ( ) ;
2750+
2751+ self . for_each ( |i| {
2752+ debug_assert_eq ! ( heap. len( ) , k) ;
2753+ // Equivalent to heap.push(min(i, heap.pop())) but more efficient.
2754+ // This should be done with a single `.peek_mut().unwrap()` but
2755+ // `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior.
2756+ if * heap. peek ( ) . unwrap ( ) > i {
2757+ * heap. peek_mut ( ) . unwrap ( ) = i;
2758+ }
2759+ } ) ;
2760+
2761+ heap. into_sorted_vec ( ) . into_iter ( )
27412762 }
27422763
27432764 /// Sort the k smallest elements into a new iterator using the provided comparison.
27442765 ///
27452766 /// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
27462767 /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
2747- /// Particularly, the comparison is not cloned.
2768+ /// Particularly, a custom heap implementation ensures the comparison is not cloned.
27482769 #[ cfg( feature = "use_alloc" ) ]
27492770 fn k_smallest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
27502771 where
@@ -2766,11 +2787,13 @@ pub trait Itertools : Iterator {
27662787 F : Fn ( & Self :: Item ) -> K ,
27672788 K : Ord ,
27682789 {
2769- self . k_smallest_by ( k, |a, b| key ( & a) . cmp ( & key ( & b) ) )
2790+ self . k_smallest_by ( k, |a, b| key ( a) . cmp ( & key ( b) ) )
27702791 }
27712792
27722793 /// Sort the k largest elements into a new iterator, in descending order.
2773- /// Functionally equivalent to `k_smallest` with a reversed `Ord`
2794+ /// Semantically equivalent to `k_smallest` with a reversed `Ord`
2795+ /// However, this is implemented by way of a custom binary heap
2796+ /// which does not have the same performance characteristics for very large `T`
27742797 /// ```
27752798 /// use itertools::Itertools;
27762799 ///
@@ -2793,7 +2816,7 @@ pub trait Itertools : Iterator {
27932816 }
27942817
27952818 /// Sort the k largest elements into a new iterator using the provided comparison.
2796- /// Functionally equivalent to `k_smallest ` with a reversed `Ord`
2819+ /// Functionally equivalent to `k_smallest_by ` with a reversed `Ord`
27972820 #[ cfg( feature = "use_alloc" ) ]
27982821 fn k_largest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
27992822 where
0 commit comments