@@ -19,10 +19,12 @@ use core::cmp::Ordering::{self, Less};
1919use core:: mem:: { self , SizedTypeProperties } ;
2020#[ cfg( not( no_global_oom_handling) ) ]
2121use core:: ptr;
22+ #[ cfg( not( no_global_oom_handling) ) ]
23+ use core:: slice:: sort;
2224
2325use crate :: alloc:: Allocator ;
2426#[ cfg( not( no_global_oom_handling) ) ]
25- use crate :: alloc:: Global ;
27+ use crate :: alloc:: { self , Global } ;
2628#[ cfg( not( no_global_oom_handling) ) ]
2729use crate :: borrow:: ToOwned ;
2830use crate :: boxed:: Box ;
@@ -203,7 +205,7 @@ impl<T> [T] {
203205 where
204206 T : Ord ,
205207 {
206- merge_sort ( self , T :: lt) ;
208+ stable_sort ( self , T :: lt) ;
207209 }
208210
209211 /// Sorts the slice with a comparator function.
@@ -259,7 +261,7 @@ impl<T> [T] {
259261 where
260262 F : FnMut ( & T , & T ) -> Ordering ,
261263 {
262- merge_sort ( self , |a, b| compare ( a, b) == Less ) ;
264+ stable_sort ( self , |a, b| compare ( a, b) == Less ) ;
263265 }
264266
265267 /// Sorts the slice with a key extraction function.
@@ -302,7 +304,7 @@ impl<T> [T] {
302304 F : FnMut ( & T ) -> K ,
303305 K : Ord ,
304306 {
305- merge_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
307+ stable_sort ( self , |a, b| f ( a) . lt ( & f ( b) ) ) ;
306308 }
307309
308310 /// Sorts the slice with a key extraction function.
@@ -809,324 +811,52 @@ impl<T: Clone> ToOwned for [T] {
809811// Sorting
810812////////////////////////////////////////////////////////////////////////////////
811813
812- /// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
813- ///
814- /// This is the integral subroutine of insertion sort.
815- #[ cfg( not( no_global_oom_handling) ) ]
816- fn insert_head < T , F > ( v : & mut [ T ] , is_less : & mut F )
817- where
818- F : FnMut ( & T , & T ) -> bool ,
819- {
820- if v. len ( ) >= 2 && is_less ( & v[ 1 ] , & v[ 0 ] ) {
821- unsafe {
822- // There are three ways to implement insertion here:
823- //
824- // 1. Swap adjacent elements until the first one gets to its final destination.
825- // However, this way we copy data around more than is necessary. If elements are big
826- // structures (costly to copy), this method will be slow.
827- //
828- // 2. Iterate until the right place for the first element is found. Then shift the
829- // elements succeeding it to make room for it and finally place it into the
830- // remaining hole. This is a good method.
831- //
832- // 3. Copy the first element into a temporary variable. Iterate until the right place
833- // for it is found. As we go along, copy every traversed element into the slot
834- // preceding it. Finally, copy data from the temporary variable into the remaining
835- // hole. This method is very good. Benchmarks demonstrated slightly better
836- // performance than with the 2nd method.
837- //
838- // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
839- let tmp = mem:: ManuallyDrop :: new ( ptr:: read ( & v[ 0 ] ) ) ;
840-
841- // Intermediate state of the insertion process is always tracked by `hole`, which
842- // serves two purposes:
843- // 1. Protects integrity of `v` from panics in `is_less`.
844- // 2. Fills the remaining hole in `v` in the end.
845- //
846- // Panic safety:
847- //
848- // If `is_less` panics at any point during the process, `hole` will get dropped and
849- // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
850- // initially held exactly once.
851- let mut hole = InsertionHole { src : & * tmp, dest : & mut v[ 1 ] } ;
852- ptr:: copy_nonoverlapping ( & v[ 1 ] , & mut v[ 0 ] , 1 ) ;
853-
854- for i in 2 ..v. len ( ) {
855- if !is_less ( & v[ i] , & * tmp) {
856- break ;
857- }
858- ptr:: copy_nonoverlapping ( & v[ i] , & mut v[ i - 1 ] , 1 ) ;
859- hole. dest = & mut v[ i] ;
860- }
861- // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
862- }
863- }
864-
865- // When dropped, copies from `src` into `dest`.
866- struct InsertionHole < T > {
867- src : * const T ,
868- dest : * mut T ,
869- }
870-
871- impl < T > Drop for InsertionHole < T > {
872- fn drop ( & mut self ) {
873- unsafe {
874- ptr:: copy_nonoverlapping ( self . src , self . dest , 1 ) ;
875- }
876- }
877- }
878- }
879-
880- /// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
881- /// stores the result into `v[..]`.
882- ///
883- /// # Safety
884- ///
885- /// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
886- /// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
887- #[ cfg( not( no_global_oom_handling) ) ]
888- unsafe fn merge < T , F > ( v : & mut [ T ] , mid : usize , buf : * mut T , is_less : & mut F )
889- where
890- F : FnMut ( & T , & T ) -> bool ,
891- {
892- let len = v. len ( ) ;
893- let v = v. as_mut_ptr ( ) ;
894- let ( v_mid, v_end) = unsafe { ( v. add ( mid) , v. add ( len) ) } ;
895-
896- // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
897- // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
898- // copying the lesser (or greater) one into `v`.
899- //
900- // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
901- // consumed first, then we must copy whatever is left of the shorter run into the remaining
902- // hole in `v`.
903- //
904- // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
905- // 1. Protects integrity of `v` from panics in `is_less`.
906- // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
907- //
908- // Panic safety:
909- //
910- // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
911- // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
912- // object it initially held exactly once.
913- let mut hole;
914-
915- if mid <= len - mid {
916- // The left run is shorter.
917- unsafe {
918- ptr:: copy_nonoverlapping ( v, buf, mid) ;
919- hole = MergeHole { start : buf, end : buf. add ( mid) , dest : v } ;
920- }
921-
922- // Initially, these pointers point to the beginnings of their arrays.
923- let left = & mut hole. start ;
924- let mut right = v_mid;
925- let out = & mut hole. dest ;
926-
927- while * left < hole. end && right < v_end {
928- // Consume the lesser side.
929- // If equal, prefer the left run to maintain stability.
930- unsafe {
931- let to_copy = if is_less ( & * right, & * * left) {
932- get_and_increment ( & mut right)
933- } else {
934- get_and_increment ( left)
935- } ;
936- ptr:: copy_nonoverlapping ( to_copy, get_and_increment ( out) , 1 ) ;
937- }
938- }
939- } else {
940- // The right run is shorter.
941- unsafe {
942- ptr:: copy_nonoverlapping ( v_mid, buf, len - mid) ;
943- hole = MergeHole { start : buf, end : buf. add ( len - mid) , dest : v_mid } ;
944- }
945-
946- // Initially, these pointers point past the ends of their arrays.
947- let left = & mut hole. dest ;
948- let right = & mut hole. end ;
949- let mut out = v_end;
950-
951- while v < * left && buf < * right {
952- // Consume the greater side.
953- // If equal, prefer the right run to maintain stability.
954- unsafe {
955- let to_copy = if is_less ( & * right. sub ( 1 ) , & * left. sub ( 1 ) ) {
956- decrement_and_get ( left)
957- } else {
958- decrement_and_get ( right)
959- } ;
960- ptr:: copy_nonoverlapping ( to_copy, decrement_and_get ( & mut out) , 1 ) ;
961- }
962- }
963- }
964- // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
965- // it will now be copied into the hole in `v`.
966-
967- unsafe fn get_and_increment < T > ( ptr : & mut * mut T ) -> * mut T {
968- let old = * ptr;
969- * ptr = unsafe { ptr. add ( 1 ) } ;
970- old
971- }
972-
973- unsafe fn decrement_and_get < T > ( ptr : & mut * mut T ) -> * mut T {
974- * ptr = unsafe { ptr. sub ( 1 ) } ;
975- * ptr
976- }
977-
978- // When dropped, copies the range `start..end` into `dest..`.
979- struct MergeHole < T > {
980- start : * mut T ,
981- end : * mut T ,
982- dest : * mut T ,
983- }
984-
985- impl < T > Drop for MergeHole < T > {
986- fn drop ( & mut self ) {
987- // `T` is not a zero-sized type, and these are pointers into a slice's elements.
988- unsafe {
989- let len = self . end . sub_ptr ( self . start ) ;
990- ptr:: copy_nonoverlapping ( self . start , self . dest , len) ;
991- }
992- }
993- }
994- }
995-
996- /// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
997- /// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
998- ///
999- /// The algorithm identifies strictly descending and non-descending subsequences, which are called
1000- /// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
1001- /// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
1002- /// satisfied:
1003- ///
1004- /// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
1005- /// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
1006- ///
1007- /// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
814+ #[ inline]
1008815#[ cfg( not( no_global_oom_handling) ) ]
1009- fn merge_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
816+ fn stable_sort < T , F > ( v : & mut [ T ] , mut is_less : F )
1010817where
1011818 F : FnMut ( & T , & T ) -> bool ,
1012819{
1013- // Slices of up to this length get sorted using insertion sort.
1014- const MAX_INSERTION : usize = 20 ;
1015- // Very short runs are extended using insertion sort to span at least this many elements.
1016- const MIN_RUN : usize = 10 ;
1017-
1018- // Sorting has no meaningful behavior on zero-sized types.
1019820 if T :: IS_ZST {
821+ // Sorting has no meaningful behavior on zero-sized types. Do nothing.
1020822 return ;
1021823 }
1022824
1023- let len = v. len ( ) ;
1024-
1025- // Short arrays get sorted in-place via insertion sort to avoid allocations.
1026- if len <= MAX_INSERTION {
1027- if len >= 2 {
1028- for i in ( 0 ..len - 1 ) . rev ( ) {
1029- insert_head ( & mut v[ i..] , & mut is_less) ;
1030- }
1031- }
1032- return ;
1033- }
1034-
1035- // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
1036- // shallow copies of the contents of `v` without risking the dtors running on copies if
1037- // `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
1038- // which will always have length at most `len / 2`.
1039- let mut buf = Vec :: with_capacity ( len / 2 ) ;
825+ let elem_alloc_fn = |len : usize | -> * mut T {
826+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
827+ // v.len(). Alloc in general will only be used as 'shadow-region' to store temporary swap
828+ // elements.
829+ unsafe { alloc:: alloc ( alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) as * mut T }
830+ } ;
1040831
1041- // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
1042- // strange decision, but consider the fact that merges more often go in the opposite direction
1043- // (forwards). According to benchmarks, merging forwards is slightly faster than merging
1044- // backwards. To conclude, identifying runs by traversing backwards improves performance.
1045- let mut runs = vec ! [ ] ;
1046- let mut end = len;
1047- while end > 0 {
1048- // Find the next natural run, and reverse it if it's strictly descending.
1049- let mut start = end - 1 ;
1050- if start > 0 {
1051- start -= 1 ;
1052- unsafe {
1053- if is_less ( v. get_unchecked ( start + 1 ) , v. get_unchecked ( start) ) {
1054- while start > 0 && is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) ) {
1055- start -= 1 ;
1056- }
1057- v[ start..end] . reverse ( ) ;
1058- } else {
1059- while start > 0 && !is_less ( v. get_unchecked ( start) , v. get_unchecked ( start - 1 ) )
1060- {
1061- start -= 1 ;
1062- }
1063- }
1064- }
1065- }
1066-
1067- // Insert some more elements into the run if it's too short. Insertion sort is faster than
1068- // merge sort on short sequences, so this significantly improves performance.
1069- while start > 0 && end - start < MIN_RUN {
1070- start -= 1 ;
1071- insert_head ( & mut v[ start..end] , & mut is_less) ;
832+ let elem_dealloc_fn = |buf_ptr : * mut T , len : usize | {
833+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
834+ // v.len(). The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
835+ // len.
836+ unsafe {
837+ alloc:: dealloc ( buf_ptr as * mut u8 , alloc:: Layout :: array :: < T > ( len) . unwrap_unchecked ( ) ) ;
1072838 }
839+ } ;
1073840
1074- // Push this run onto the stack.
1075- runs. push ( Run { start, len : end - start } ) ;
1076- end = start;
1077-
1078- // Merge some pairs of adjacent runs to satisfy the invariants.
1079- while let Some ( r) = collapse ( & runs) {
1080- let left = runs[ r + 1 ] ;
1081- let right = runs[ r] ;
1082- unsafe {
1083- merge (
1084- & mut v[ left. start ..right. start + right. len ] ,
1085- left. len ,
1086- buf. as_mut_ptr ( ) ,
1087- & mut is_less,
1088- ) ;
1089- }
1090- runs[ r] = Run { start : left. start , len : left. len + right. len } ;
1091- runs. remove ( r + 1 ) ;
841+ let run_alloc_fn = |len : usize | -> * mut sort:: TimSortRun {
842+ // SAFETY: Creating the layout is safe as long as merge_sort never calls this with an
843+ // obscene length or 0.
844+ unsafe {
845+ alloc:: alloc ( alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) )
846+ as * mut sort:: TimSortRun
1092847 }
1093- }
1094-
1095- // Finally, exactly one run must remain in the stack.
1096- debug_assert ! ( runs. len( ) == 1 && runs[ 0 ] . start == 0 && runs[ 0 ] . len == len) ;
848+ } ;
1097849
1098- // Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
1099- // if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
1100- // algorithm should continue building a new run instead, `None` is returned.
1101- //
1102- // TimSort is infamous for its buggy implementations, as described here:
1103- // http://envisage-project.eu/timsort-specification-and-verification/
1104- //
1105- // The gist of the story is: we must enforce the invariants on the top four runs on the stack.
1106- // Enforcing them on just top three is not sufficient to ensure that the invariants will still
1107- // hold for *all* runs in the stack.
1108- //
1109- // This function correctly checks invariants for the top four runs. Additionally, if the top
1110- // run starts at index 0, it will always demand a merge operation until the stack is fully
1111- // collapsed, in order to complete the sort.
1112- #[ inline]
1113- fn collapse ( runs : & [ Run ] ) -> Option < usize > {
1114- let n = runs. len ( ) ;
1115- if n >= 2
1116- && ( runs[ n - 1 ] . start == 0
1117- || runs[ n - 2 ] . len <= runs[ n - 1 ] . len
1118- || ( n >= 3 && runs[ n - 3 ] . len <= runs[ n - 2 ] . len + runs[ n - 1 ] . len )
1119- || ( n >= 4 && runs[ n - 4 ] . len <= runs[ n - 3 ] . len + runs[ n - 2 ] . len ) )
1120- {
1121- if n >= 3 && runs[ n - 3 ] . len < runs[ n - 1 ] . len { Some ( n - 3 ) } else { Some ( n - 2 ) }
1122- } else {
1123- None
850+ let run_dealloc_fn = |buf_ptr : * mut sort:: TimSortRun , len : usize | {
851+ // SAFETY: The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
852+ // len.
853+ unsafe {
854+ alloc:: dealloc (
855+ buf_ptr as * mut u8 ,
856+ alloc:: Layout :: array :: < sort:: TimSortRun > ( len) . unwrap_unchecked ( ) ,
857+ ) ;
1124858 }
1125- }
859+ } ;
1126860
1127- #[ derive( Clone , Copy ) ]
1128- struct Run {
1129- start : usize ,
1130- len : usize ,
1131- }
861+ sort:: merge_sort ( v, & mut is_less, elem_alloc_fn, elem_dealloc_fn, run_alloc_fn, run_dealloc_fn) ;
1132862}
0 commit comments