11use alloc:: vec:: IntoIter ;
22use core:: cmp:: { Ord , Ordering , Reverse } ;
3- use core:: mem:: { replace, transmute, MaybeUninit } ;
4- use core:: ops:: Range ;
3+ use core:: mem:: replace;
54
6- fn k_smallest_dynamic < T , I : Iterator < Item = T > > (
5+ /// Consumes a given iterator, leaving the minimum elements in the provided storage in **ascending** order.
6+ fn k_smallest_general < T , I : Iterator < Item = T > > (
77 iter : I ,
88 k : usize ,
99 order : impl Fn ( & T , & T ) -> Ordering ,
1010) -> IntoIter < T > {
11- let mut storage = Vec :: new ( ) ;
12- storage. resize_with ( k, MaybeUninit :: uninit) ;
13- let Range { end, .. } = capped_heapsort ( iter, & mut storage, order) ;
14- storage. truncate ( end) ;
15- let initialized: Vec < _ > = unsafe { transmute ( storage) } ;
16- initialized. into_iter ( )
11+ if k == 0 {
12+ return Vec :: new ( ) . into_iter ( ) ;
13+ }
14+ let heap = MaxHeap :: from_iter ( k, order, iter) ;
15+ heap. unwrap_sorted ( ) . into_iter ( )
1716}
1817
1918pub ( crate ) fn reverse_cmp < T , F > ( cmp : F ) -> impl Fn ( & T , & T ) -> Ordering
@@ -35,15 +34,15 @@ where
3534 T : Ord ,
3635 I : Iterator < Item = T > ,
3736{
38- k_smallest_dynamic ( iter, k, T :: cmp)
37+ k_smallest_general ( iter, k, T :: cmp)
3938}
4039
4140pub ( crate ) fn k_smallest_by < T , I , F > ( iter : I , k : usize , cmp : F ) -> IntoIter < T >
4241where
4342 I : Iterator < Item = T > ,
4443 F : Fn ( & T , & T ) -> Ordering ,
4544{
46- k_smallest_dynamic ( iter, k, cmp)
45+ k_smallest_general ( iter, k, cmp)
4746}
4847
4948pub ( crate ) fn k_smallest_by_key < T , I , F , K > ( iter : I , k : usize , key : F ) -> IntoIter < T >
@@ -54,75 +53,54 @@ where
5453{
5554 let iter = iter. map ( |v| Pair ( key ( & v) , v) ) ;
5655
57- let results: Vec < _ > = k_smallest_dynamic ( iter, k, Ord :: cmp)
56+ let results: Vec < _ > = k_smallest_general ( iter, k, Ord :: cmp)
5857 . map ( |Pair ( _, t) | t)
5958 . collect ( ) ;
6059 results. into_iter ( )
6160}
6261
63- /// Consumes a given iterator, leaving the minimum elements in the provided storage in **ascending** order.
64- /// Returns the range of initialized elements
65- fn capped_heapsort < T , I : Iterator < Item = T > > (
66- iter : I ,
67- storage : & mut [ MaybeUninit < T > ] ,
68- order : impl Fn ( & T , & T ) -> Ordering ,
69- ) -> Range < usize > {
70- if storage. is_empty ( ) {
71- return 0 ..0 ;
72- }
73- let mut heap = MaxHeap :: from_iter ( storage, order, iter) ;
74-
75- let valid_elements = 0 ..heap. len ;
76- while heap. len > 0 {
77- heap. pop ( ) ;
78- }
79- valid_elements
80- }
81-
8262/// An efficient heapsort requires that the heap ordering is the inverse of the desired sort order
8363/// So the basic case of retrieving minimum elements requires a max heap
8464///
8565/// This type does not attempt to reproduce all the functionality of [std::collections::BinaryHeap] and instead only implements what is needed for iter operations,
8666/// e.g. we do not need to insert single elements.
8767/// Additionally, some minor optimizations used in the std BinaryHeap are not used here, e.g. elements are actually swapped rather than managing a "hole"
88- ///
89- /// To be generic over the underlying storage, it takes a mutable reference to avoid having to define a storage trait.
90- struct MaxHeap < ' a , T , C > {
68+ struct MaxHeap < T , C > {
9169 // It may be not be possible to shrink the storage for smaller sequencess
9270 // so manually manage the initialization
9371 // This is **assumed not to be empty**
94- storage : & ' a mut [ MaybeUninit < T > ] ,
72+ storage : alloc :: vec :: Vec < T > ,
9573 comparator : C ,
96- // SAFETY: this must always be less or equal to the count of actually initialized elements
74+ // this is always less or equal to the count of actual elements
75+ // allowing it to be less means the heap property can cover only a subset of the vec
76+ // while reusing the storage
9777 len : usize ,
9878}
9979
100- impl < ' a , T , C > MaxHeap < ' a , T , C >
80+ impl < T , C > MaxHeap < T , C >
10181where
10282 C : Fn ( & T , & T ) -> Ordering ,
10383{
104- fn from_iter < I > ( storage : & ' a mut [ MaybeUninit < T > ] , comparator : C , mut iter : I ) -> Self
84+ fn from_iter < I > ( k : usize , comparator : C , mut iter : I ) -> Self
10585 where
10686 I : Iterator < Item = T > ,
10787 {
88+ let storage: Vec < T > = iter. by_ref ( ) . take ( k) . collect ( ) ;
89+
10890 let mut heap = Self {
91+ len : storage. len ( ) ,
10992 storage,
11093 comparator,
111- len : 0 ,
11294 } ;
113- for ( i, initial_item) in iter. by_ref ( ) . take ( heap. storage . len ( ) ) . enumerate ( ) {
114- heap. storage [ i] = MaybeUninit :: new ( initial_item) ;
115- heap. len += 1 ;
116- }
11795 // Filling up the storage and only afterwards rearranging to form a valid heap is slightly more efficient
11896 // (But only by a factor of lg(k) and I'd love to hear of a usecase where that matters)
11997 heap. heapify ( ) ;
12098
121- if heap . len == heap. storage . len ( ) {
99+ if k == heap. storage . len ( ) {
122100 // Nothing else needs done if we didn't fill the storage in the first place
123101 // Also avoids unexpected behaviour with restartable iterators
124102 for val in iter {
125- let _ = heap. push_pop ( val) ;
103+ heap. push_pop ( val) ;
126104 }
127105 }
128106 heap
@@ -133,11 +111,7 @@ where
133111 /// element ordering.
134112 fn get ( & self , index : usize ) -> Option < & T > {
135113 if index < self . len {
136- let ptr = unsafe {
137- // There might be a better way to do this but assume_init_ref doesn't exist on MSRV
138- self . storage [ index] . as_ptr ( ) . as_ref ( ) . unwrap ( )
139- } ;
140- Some ( ptr)
114+ self . storage . get ( index)
141115 } else {
142116 None
143117 }
@@ -168,7 +142,6 @@ where
168142 let ( original_item, replacement_item) = ( self . get ( origin) , self . get ( replacement_idx) ) ;
169143
170144 let cmp = self . compare ( original_item, replacement_item) ;
171- // If the left item also doesn't exist, this comparison will fall through
172145 if Some ( Ordering :: Less ) == cmp {
173146 self . storage . swap ( origin, replacement_idx) ;
174147 self . sift_down ( replacement_idx) ;
@@ -188,17 +161,13 @@ where
188161
189162 /// Insert the given element into the heap without changing its size
190163 /// The displaced element is returned, i.e. either the input or previous max
191- fn push_pop ( & mut self , val : T ) -> Option < T > {
164+ fn push_pop ( & mut self , val : T ) -> T {
192165 if self . compare ( self . get ( 0 ) , Some ( & val) ) == Some ( Ordering :: Greater ) {
193- let out = replace ( & mut self . storage [ 0 ] , MaybeUninit :: new ( val) ) ;
166+ let out = replace ( & mut self . storage [ 0 ] , val) ;
194167 self . sift_down ( 0 ) ;
195- // SAFETY: This has been moved out of storage[0]
196- // storage[0] will be uninitialized if and only if self.len == 0
197- // In that case, self.get(0) above will return None, and the comparison will fall through to None
198- // So to get here, self.len > 0 and therefore this element was initialized
199- unsafe { Some ( out. assume_init ( ) ) }
168+ out
200169 } else {
201- Some ( val)
170+ val
202171 }
203172 }
204173
@@ -213,6 +182,14 @@ where
213182 fn compare ( & self , a : Option < & T > , b : Option < & T > ) -> Option < Ordering > {
214183 ( self . comparator ) ( a?, b?) . into ( )
215184 }
185+
186+ // Totally orders the elements and returns the raw storage
187+ fn unwrap_sorted ( mut self ) -> Vec < T > {
188+ while self . len > 1 {
189+ self . pop ( ) ;
190+ }
191+ self . storage
192+ }
216193}
217194
218195struct Pair < K , T > ( K , T ) ;
0 commit comments