@@ -2950,14 +2950,108 @@ pub trait Itertools: Iterator {
29502950 /// itertools::assert_equal(five_smallest, 0..5);
29512951 /// ```
29522952 #[ cfg( feature = "use_alloc" ) ]
2953- fn k_smallest ( self , k : usize ) -> VecIntoIter < Self :: Item >
2953+ fn k_smallest ( mut self , k : usize ) -> VecIntoIter < Self :: Item >
29542954 where
29552955 Self : Sized ,
29562956 Self :: Item : Ord ,
29572957 {
2958- crate :: k_smallest:: k_smallest ( self , k)
2959- . into_sorted_vec ( )
2960- . into_iter ( )
2958+ // The stdlib heap has optimised handling of "holes", which is not included in our heap implementation in k_smallest_general.
2959+ // While the difference is unlikely to have practical impact unless `Self::Item` is very large, this method uses the stdlib structure
2960+ // to maintain performance compared to previous versions of the crate.
2961+ use alloc:: collections:: BinaryHeap ;
2962+
2963+ if k == 0 {
2964+ return Vec :: new ( ) . into_iter ( ) ;
2965+ }
2966+
2967+ let mut heap = self . by_ref ( ) . take ( k) . collect :: < BinaryHeap < _ > > ( ) ;
2968+
2969+ self . for_each ( |i| {
2970+ debug_assert_eq ! ( heap. len( ) , k) ;
2971+ // Equivalent to heap.push(min(i, heap.pop())) but more efficient.
2972+ // This should be done with a single `.peek_mut().unwrap()` but
2973+ // `PeekMut` sifts-down unconditionally on Rust 1.46.0 and prior.
2974+ if * heap. peek ( ) . unwrap ( ) > i {
2975+ * heap. peek_mut ( ) . unwrap ( ) = i;
2976+ }
2977+ } ) ;
2978+
2979+ heap. into_sorted_vec ( ) . into_iter ( )
2980+ }
2981+
2982+ /// Sort the k smallest elements into a new iterator using the provided comparison.
2983+ ///
2984+ /// This corresponds to `self.sorted_by(cmp).take(k)` in the same way that
2985+ /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and complexity.
2986+ /// Particularly, a custom heap implementation ensures the comparison is not cloned.
2987+ #[ cfg( feature = "use_alloc" ) ]
2988+ fn k_smallest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
2989+ where
2990+ Self : Sized ,
2991+ F : Fn ( & Self :: Item , & Self :: Item ) -> Ordering ,
2992+ {
2993+ k_smallest:: k_smallest_general ( self , k, cmp) . into_iter ( )
2994+ }
2995+
2996+ /// Return the elements producing the k smallest outputs of the provided function
2997+ ///
2998+ /// This corresponds to `self.sorted_by_key(cmp).take(k)` in the same way that
2999+ /// [Itertools::k_smallest] corresponds to `self.sorted().take(k)`, in both semantics and time complexity.
3000+ #[ cfg( feature = "use_alloc" ) ]
3001+ fn k_smallest_by_key < F , K > ( self , k : usize , key : F ) -> VecIntoIter < Self :: Item >
3002+ where
3003+ Self : Sized ,
3004+ F : Fn ( & Self :: Item ) -> K ,
3005+ K : Ord ,
3006+ {
3007+ self . k_smallest_by ( k, k_smallest:: key_to_cmp ( key) )
3008+ }
3009+
3010+ /// Sort the k largest elements into a new iterator, in descending order.
3011+ /// Semantically equivalent to `k_smallest` with a reversed `Ord`
3012+ /// However, this is implemented by way of a custom binary heap
3013+ /// which does not have the same performance characteristics for very large `Self::Item`
3014+ /// ```
3015+ /// use itertools::Itertools;
3016+ ///
3017+ /// // A random permutation of 0..15
3018+ /// let numbers = vec![6, 9, 1, 14, 0, 4, 8, 7, 11, 2, 10, 3, 13, 12, 5];
3019+ ///
3020+ /// let five_largest = numbers
3021+ /// .into_iter()
3022+ /// .k_largest(5);
3023+ ///
3024+ /// itertools::assert_equal(five_largest, vec![14,13,12,11,10]);
3025+ /// ```
3026+ #[ cfg( feature = "use_alloc" ) ]
3027+ fn k_largest ( self , k : usize ) -> VecIntoIter < Self :: Item >
3028+ where
3029+ Self : Sized ,
3030+ Self :: Item : Ord ,
3031+ {
3032+ self . k_largest_by ( k, Self :: Item :: cmp)
3033+ }
3034+
3035+ /// Sort the k largest elements into a new iterator using the provided comparison.
3036+ /// Functionally equivalent to `k_smallest_by` with a reversed `Ord`
3037+ #[ cfg( feature = "use_alloc" ) ]
3038+ fn k_largest_by < F > ( self , k : usize , cmp : F ) -> VecIntoIter < Self :: Item >
3039+ where
3040+ Self : Sized ,
3041+ F : Fn ( & Self :: Item , & Self :: Item ) -> Ordering ,
3042+ {
3043+ self . k_smallest_by ( k, move |a, b| cmp ( b, a) )
3044+ }
3045+
3046+ /// Return the elements producing the k largest outputs of the provided function
3047+ #[ cfg( feature = "use_alloc" ) ]
3048+ fn k_largest_by_key < F , K > ( self , k : usize , key : F ) -> VecIntoIter < Self :: Item >
3049+ where
3050+ Self : Sized ,
3051+ F : Fn ( & Self :: Item ) -> K ,
3052+ K : Ord ,
3053+ {
3054+ self . k_largest_by ( k, k_smallest:: key_to_cmp ( key) )
29613055 }
29623056
29633057 /// Collect all iterator elements into one of two
0 commit comments