@@ -398,6 +398,7 @@ use crate::cmp::Ordering;
398398use crate :: intrinsics:: const_eval_select;
399399use crate :: marker:: FnPtr ;
400400use crate :: mem:: { self , MaybeUninit , SizedTypeProperties } ;
401+ use crate :: num:: NonZero ;
401402use crate :: { fmt, hash, intrinsics, ub_checks} ;
402403
403404mod alignment;
@@ -1094,49 +1095,26 @@ pub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
10941095 // are pointers inside `T` we will copy them in one go rather than trying to copy a part
10951096 // of a pointer (which would not work).
10961097 // SAFETY: Same preconditions as this function
1097- unsafe { swap_nonoverlapping_simple_untyped ( x, y, count) }
1098+ unsafe { swap_nonoverlapping_const ( x, y, count) }
10981099 } else {
1099- macro_rules! attempt_swap_as_chunks {
1100- ( $ChunkTy : ty) => {
1101- if mem:: align_of:: <T >( ) >= mem:: align_of:: <$ChunkTy >( )
1102- && mem:: size_of:: <T >( ) % mem:: size_of:: <$ChunkTy >( ) == 0
1103- {
1104- let x: * mut $ChunkTy = x. cast( ) ;
1105- let y: * mut $ChunkTy = y. cast( ) ;
1106- let count = count * ( mem:: size_of:: <T >( ) / mem:: size_of:: <$ChunkTy >( ) ) ;
1107- // SAFETY: these are the same bytes that the caller promised were
1108- // ok, just typed as `MaybeUninit<ChunkTy>`s instead of as `T`s.
1109- // The `if` condition above ensures that we're not violating
1110- // alignment requirements, and that the division is exact so
1111- // that we don't lose any bytes off the end.
1112- return unsafe { swap_nonoverlapping_simple_untyped( x, y, count) } ;
1113- }
1114- } ;
1100+ // Going though a slice here helps codegen know the size fits in `isize`
1101+ let slice = slice_from_raw_parts_mut( x, count) ;
1102+ // SAFETY: We have two non-overlapping ranges in memory and they're both
1103+ // readable so don't include the null address, thus they're at most
1104+ // `(usize::MAX - 1)/2 = isize::MAX` bytes long.
1105+ let bytes = unsafe { mem:: size_of_val_raw:: <[ T ] >( slice) } ;
1106+ if let Some ( bytes) = NonZero :: new( bytes) {
1107+ // SAFETY: These are the same ranges, just expressed in a different
1108+ // type, so they're still non-overlapping.
1109+ unsafe { swap_nonoverlapping_bytes( x. cast( ) , y. cast( ) , bytes) } ;
11151110 }
1116-
1117- // Split up the slice into small power-of-two-sized chunks that LLVM is able
1118- // to vectorize (unless it's a special type with more-than-pointer alignment,
1119- // because we don't want to pessimize things like slices of SIMD vectors.)
1120- if mem:: align_of:: <T >( ) <= mem:: size_of:: <usize >( )
1121- && ( !mem:: size_of:: <T >( ) . is_power_of_two( )
1122- || mem:: size_of:: <T >( ) > mem:: size_of:: <usize >( ) * 2 )
1123- {
1124- attempt_swap_as_chunks!( usize ) ;
1125- attempt_swap_as_chunks!( u8 ) ;
1126- }
1127-
1128- // SAFETY: Same preconditions as this function
1129- unsafe { swap_nonoverlapping_simple_untyped( x, y, count) }
11301111 }
11311112 )
11321113}
11331114
11341115/// Same behavior and safety conditions as [`swap_nonoverlapping`]
1135- ///
1136- /// LLVM can vectorize this (at least it can for the power-of-two-sized types
1137- /// `swap_nonoverlapping` tries to use) so no need to manually SIMD it.
11381116#[ inline]
1139- const unsafe fn swap_nonoverlapping_simple_untyped < T > ( x : * mut T , y : * mut T , count : usize ) {
1117+ const unsafe fn swap_nonoverlapping_const < T > ( x : * mut T , y : * mut T , count : usize ) {
11401118 let x = x. cast :: < MaybeUninit < T > > ( ) ;
11411119 let y = y. cast :: < MaybeUninit < T > > ( ) ;
11421120 let mut i = 0 ;
@@ -1147,13 +1125,6 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
11471125 // and it's distinct from `x` since the ranges are non-overlapping
11481126 let y = unsafe { y. add ( i) } ;
11491127
1150- // If we end up here, it's because we're using a simple type -- like
1151- // a small power-of-two-sized thing -- or a special type with particularly
1152- // large alignment, particularly SIMD types.
1153- // Thus, we're fine just reading-and-writing it, as either it's small
1154- // and that works well anyway or it's special and the type's author
1155- // presumably wanted things to be done in the larger chunk.
1156-
11571128 // SAFETY: we're only ever given pointers that are valid to read/write,
11581129 // including being aligned, and nothing here panics so it's drop-safe.
11591130 unsafe {
@@ -1167,6 +1138,76 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
11671138 }
11681139}
11691140
1141+ // Don't let MIR inline this, because we really want it to keep its noalias metadata
1142+ #[ rustc_no_mir_inline]
1143+ #[ inline]
1144+ fn swap_chunk < const N : usize > ( x : & mut MaybeUninit < [ u8 ; N ] > , y : & mut MaybeUninit < [ u8 ; N ] > ) {
1145+ let a = * x;
1146+ let b = * y;
1147+ * x = b;
1148+ * y = a;
1149+ }
1150+
1151+ #[ inline]
1152+ unsafe fn swap_nonoverlapping_bytes ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1153+ // Same as `swap_nonoverlapping::<[u8; N]>`.
1154+ unsafe fn swap_nonoverlapping_chunks < const N : usize > (
1155+ x : * mut MaybeUninit < [ u8 ; N ] > ,
1156+ y : * mut MaybeUninit < [ u8 ; N ] > ,
1157+ chunks : NonZero < usize > ,
1158+ ) {
1159+ let chunks = chunks. get ( ) ;
1160+ for i in 0 ..chunks {
1161+ // SAFETY: i is in [0, chunks) so the adds and dereferences are in-bounds.
1162+ unsafe { swap_chunk ( & mut * x. add ( i) , & mut * y. add ( i) ) } ;
1163+ }
1164+ }
1165+
1166+ // Same as `swap_nonoverlapping_bytes`, but accepts at most 1+2+4=7 bytes
1167+ #[ inline]
1168+ unsafe fn swap_nonoverlapping_short ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1169+ // Tail handling for auto-vectorized code sometimes has element-at-a-time behaviour,
1170+ // see <https://github.com/rust-lang/rust/issues/134946>.
1171+ // By swapping as different sizes, rather than as a loop over bytes,
1172+ // we make sure not to end up with, say, seven byte-at-a-time copies.
1173+
1174+ let bytes = bytes. get ( ) ;
1175+ let mut i = 0 ;
1176+ macro_rules! swap_prefix {
1177+ ( $( $n: literal) +) => { $(
1178+ if ( bytes & $n) != 0 {
1179+ // SAFETY: `i` can only have the same bits set as those in bytes,
1180+ // so these `add`s are in-bounds of `bytes`. But the bit for
1181+ // `$n` hasn't been set yet, so the `$n` bytes that `swap_chunk`
1182+ // will read and write are within the usable range.
1183+ unsafe { swap_chunk:: <$n>( & mut * x. add( i) . cast( ) , & mut * y. add( i) . cast( ) ) } ;
1184+ i |= $n;
1185+ }
1186+ ) +} ;
1187+ }
1188+ swap_prefix ! ( 4 2 1 ) ;
1189+ debug_assert_eq ! ( i, bytes) ;
1190+ }
1191+
1192+ const CHUNK_SIZE : usize = size_of :: < * const ( ) > ( ) ;
1193+ let bytes = bytes. get ( ) ;
1194+
1195+ let chunks = bytes / CHUNK_SIZE ;
1196+ let tail = bytes % CHUNK_SIZE ;
1197+ if let Some ( chunks) = NonZero :: new ( chunks) {
1198+ // SAFETY: this is bytes/CHUNK_SIZE*CHUNK_SIZE bytes, which is <= bytes,
1199+ // so it's within the range of our non-overlapping bytes.
1200+ unsafe { swap_nonoverlapping_chunks :: < CHUNK_SIZE > ( x. cast ( ) , y. cast ( ) , chunks) } ;
1201+ }
1202+ if let Some ( tail) = NonZero :: new ( tail) {
1203+ const { assert ! ( CHUNK_SIZE <= 8 ) } ;
1204+ let delta = chunks * CHUNK_SIZE ;
1205+ // SAFETY: the tail length is below CHUNK SIZE because of the remainder,
1206+ // and CHUNK_SIZE is at most 8 by the const assert, so tail <= 7
1207+ unsafe { swap_nonoverlapping_short ( x. add ( delta) , y. add ( delta) , tail) } ;
1208+ }
1209+ }
1210+
11701211/// Moves `src` into the pointed `dst`, returning the previous `dst` value.
11711212///
11721213/// Neither value is dropped.
0 commit comments