@@ -398,6 +398,7 @@ use crate::cmp::Ordering;
398398use crate :: intrinsics:: const_eval_select;
399399use crate :: marker:: FnPtr ;
400400use crate :: mem:: { self , MaybeUninit , SizedTypeProperties } ;
401+ use crate :: num:: NonZero ;
401402use crate :: { fmt, hash, intrinsics, ub_checks} ;
402403
403404mod alignment;
@@ -1094,49 +1095,22 @@ pub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
10941095 // are pointers inside `T` we will copy them in one go rather than trying to copy a part
10951096 // of a pointer (which would not work).
10961097 // SAFETY: Same preconditions as this function
1097- unsafe { swap_nonoverlapping_simple_untyped ( x, y, count) }
1098+ unsafe { swap_nonoverlapping_const ( x, y, count) }
10981099 } else {
1099- macro_rules! attempt_swap_as_chunks {
1100- ( $ChunkTy : ty) => {
1101- if mem:: align_of:: <T >( ) >= mem:: align_of:: <$ChunkTy >( )
1102- && mem:: size_of:: <T >( ) % mem:: size_of:: <$ChunkTy >( ) == 0
1103- {
1104- let x: * mut $ChunkTy = x. cast( ) ;
1105- let y: * mut $ChunkTy = y. cast( ) ;
1106- let count = count * ( mem:: size_of:: <T >( ) / mem:: size_of:: <$ChunkTy >( ) ) ;
1107- // SAFETY: these are the same bytes that the caller promised were
1108- // ok, just typed as `MaybeUninit<ChunkTy>`s instead of as `T`s.
1109- // The `if` condition above ensures that we're not violating
1110- // alignment requirements, and that the division is exact so
1111- // that we don't lose any bytes off the end.
1112- return unsafe { swap_nonoverlapping_simple_untyped( x, y, count) } ;
1113- }
1114- } ;
1100+ // SAFETY: To exist as a memory range its size in bytes can't overflow.
1101+ let bytes = unsafe { size_of:: <T >( ) . unchecked_mul( count) } ;
1102+ if let Some ( bytes) = NonZero :: new( bytes) {
1103+ // SAFETY: These are the same ranges, just expressed in a different
1104+ // type, so they're still non-overlapping.
1105+ unsafe { swap_nonoverlapping_bytes( x. cast( ) , y. cast( ) , bytes) } ;
11151106 }
1116-
1117- // Split up the slice into small power-of-two-sized chunks that LLVM is able
1118- // to vectorize (unless it's a special type with more-than-pointer alignment,
1119- // because we don't want to pessimize things like slices of SIMD vectors.)
1120- if mem:: align_of:: <T >( ) <= mem:: size_of:: <usize >( )
1121- && ( !mem:: size_of:: <T >( ) . is_power_of_two( )
1122- || mem:: size_of:: <T >( ) > mem:: size_of:: <usize >( ) * 2 )
1123- {
1124- attempt_swap_as_chunks!( usize ) ;
1125- attempt_swap_as_chunks!( u8 ) ;
1126- }
1127-
1128- // SAFETY: Same preconditions as this function
1129- unsafe { swap_nonoverlapping_simple_untyped( x, y, count) }
11301107 }
11311108 )
11321109}
11331110
11341111/// Same behavior and safety conditions as [`swap_nonoverlapping`]
1135- ///
1136- /// LLVM can vectorize this (at least it can for the power-of-two-sized types
1137- /// `swap_nonoverlapping` tries to use) so no need to manually SIMD it.
11381112#[ inline]
1139- const unsafe fn swap_nonoverlapping_simple_untyped < T > ( x : * mut T , y : * mut T , count : usize ) {
1113+ const unsafe fn swap_nonoverlapping_const < T > ( x : * mut T , y : * mut T , count : usize ) {
11401114 let x = x. cast :: < MaybeUninit < T > > ( ) ;
11411115 let y = y. cast :: < MaybeUninit < T > > ( ) ;
11421116 let mut i = 0 ;
@@ -1147,13 +1121,6 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
11471121 // and it's distinct from `x` since the ranges are non-overlapping
11481122 let y = unsafe { y. add ( i) } ;
11491123
1150- // If we end up here, it's because we're using a simple type -- like
1151- // a small power-of-two-sized thing -- or a special type with particularly
1152- // large alignment, particularly SIMD types.
1153- // Thus, we're fine just reading-and-writing it, as either it's small
1154- // and that works well anyway or it's special and the type's author
1155- // presumably wanted things to be done in the larger chunk.
1156-
11571124 // SAFETY: we're only ever given pointers that are valid to read/write,
11581125 // including being aligned, and nothing here panics so it's drop-safe.
11591126 unsafe {
@@ -1167,6 +1134,72 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
11671134 }
11681135}
11691136
1137+ // Don't let MIR inline this, because we really want it to keep its noalias metadata
1138+ #[ rustc_no_mir_inline]
1139+ #[ inline]
1140+ fn swap_chunk < const N : usize > ( x : & mut MaybeUninit < [ u8 ; N ] > , y : & mut MaybeUninit < [ u8 ; N ] > ) {
1141+ let a = * x;
1142+ let b = * y;
1143+ * x = b;
1144+ * y = a;
1145+ }
1146+
1147+ #[ inline]
1148+ unsafe fn swap_nonoverlapping_bytes ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1149+ // Same as `swap_nonoverlapping::<[u8; N]>`.
1150+ #[ inline]
1151+ unsafe fn swap_nonoverlapping_chunks < const N : usize > (
1152+ x : * mut MaybeUninit < [ u8 ; N ] > ,
1153+ y : * mut MaybeUninit < [ u8 ; N ] > ,
1154+ chunks : NonZero < usize > ,
1155+ ) {
1156+ let chunks = chunks. get ( ) ;
1157+ for i in 0 ..chunks {
1158+ // SAFETY: i is in [0, chunks) so the adds and dereferences are in-bounds.
1159+ unsafe { swap_chunk ( & mut * x. add ( i) , & mut * y. add ( i) ) } ;
1160+ }
1161+ }
1162+
1163+ // Same as `swap_nonoverlapping_bytes`, but accepts at most 1+2+4=7 bytes
1164+ #[ inline]
1165+ unsafe fn swap_nonoverlapping_short ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1166+ let bytes = bytes. get ( ) ;
1167+ let mut i = 0 ;
1168+ macro_rules! swap_prefix {
1169+ ( $( $n: literal) +) => { $(
1170+ if ( bytes & $n) != 0 {
1171+ // SAFETY: `i` can only have the same bits set as those in bytes,
1172+ // so these `add`s are in-bounds of `bytes`. But the bit for
1173+ // `$n` hasn't been set yet, so the `$n` bytes that `swap_chunk`
1174+ // will read and write are within the usable range.
1175+ unsafe { swap_chunk:: <$n>( & mut * x. add( i) . cast( ) , & mut * y. add( i) . cast( ) ) } ;
1176+ i |= $n;
1177+ }
1178+ ) +} ;
1179+ }
1180+ swap_prefix ! ( 4 2 1 ) ;
1181+ debug_assert_eq ! ( i, bytes) ;
1182+ }
1183+
1184+ const CHUNK_SIZE : usize = size_of :: < * const ( ) > ( ) ;
1185+ let bytes = bytes. get ( ) ;
1186+
1187+ let chunks = bytes / CHUNK_SIZE ;
1188+ let tail = bytes % CHUNK_SIZE ;
1189+ if let Some ( chunks) = NonZero :: new ( chunks) {
1190+ // SAFETY: this is bytes/CHUNK_SIZE*CHUNK_SIZE bytes, which is <= bytes,
1191+ // so it's within the range of our non-overlapping bytes.
1192+ unsafe { swap_nonoverlapping_chunks :: < CHUNK_SIZE > ( x. cast ( ) , y. cast ( ) , chunks) } ;
1193+ }
1194+ if let Some ( tail) = NonZero :: new ( tail) {
1195+ const { assert ! ( CHUNK_SIZE <= 8 ) } ;
1196+ let delta = chunks * CHUNK_SIZE ;
1197+ // SAFETY: the tail length is below CHUNK SIZE because of the remainder,
1198+ // and CHUNK_SIZE is at most 8 by the const assert, so tail <= 7
1199+ unsafe { swap_nonoverlapping_short ( x. add ( delta) , y. add ( delta) , tail) } ;
1200+ }
1201+ }
1202+
11701203/// Moves `src` into the pointed `dst`, returning the previous `dst` value.
11711204///
11721205/// Neither value is dropped.
0 commit comments