@@ -398,6 +398,7 @@ use crate::cmp::Ordering;
398398use crate :: intrinsics:: const_eval_select;
399399use crate :: marker:: FnPtr ;
400400use crate :: mem:: { self , MaybeUninit , SizedTypeProperties } ;
401+ use crate :: num:: NonZero ;
401402use crate :: { fmt, hash, intrinsics, ub_checks} ;
402403
403404mod alignment;
@@ -1094,49 +1095,26 @@ pub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
10941095 // are pointers inside `T` we will copy them in one go rather than trying to copy a part
10951096 // of a pointer (which would not work).
10961097 // SAFETY: Same preconditions as this function
1097- unsafe { swap_nonoverlapping_simple_untyped ( x, y, count) }
1098+ unsafe { swap_nonoverlapping_const ( x, y, count) }
10981099 } else {
1099- macro_rules! attempt_swap_as_chunks {
1100- ( $ChunkTy : ty) => {
1101- if mem:: align_of:: <T >( ) >= mem:: align_of:: <$ChunkTy >( )
1102- && mem:: size_of:: <T >( ) % mem:: size_of:: <$ChunkTy >( ) == 0
1103- {
1104- let x: * mut $ChunkTy = x. cast( ) ;
1105- let y: * mut $ChunkTy = y. cast( ) ;
1106- let count = count * ( mem:: size_of:: <T >( ) / mem:: size_of:: <$ChunkTy >( ) ) ;
1107- // SAFETY: these are the same bytes that the caller promised were
1108- // ok, just typed as `MaybeUninit<ChunkTy>`s instead of as `T`s.
1109- // The `if` condition above ensures that we're not violating
1110- // alignment requirements, and that the division is exact so
1111- // that we don't lose any bytes off the end.
1112- return unsafe { swap_nonoverlapping_simple_untyped( x, y, count) } ;
1113- }
1114- } ;
1100+ // Going though a slice here helps codegen know the size fits in `isize`
1101+ let slice = slice_from_raw_parts_mut( x, count) ;
1102+ // SAFETY: We have two non-overlapping ranges in memory and they're both
1103+ // readable so don't include the null address, thus they're at most
1104+ // `(usize::MAX - 1)/2 = isize::MAX` bytes long.
1105+ let bytes = unsafe { mem:: size_of_val_raw:: <[ T ] >( slice) } ;
1106+ if let Some ( bytes) = NonZero :: new( bytes) {
1107+ // SAFETY: These are the same ranges, just expressed in a different
1108+ // type, so they're still non-overlapping.
1109+ unsafe { swap_nonoverlapping_bytes( x. cast( ) , y. cast( ) , bytes) } ;
11151110 }
1116-
1117- // Split up the slice into small power-of-two-sized chunks that LLVM is able
1118- // to vectorize (unless it's a special type with more-than-pointer alignment,
1119- // because we don't want to pessimize things like slices of SIMD vectors.)
1120- if mem:: align_of:: <T >( ) <= mem:: size_of:: <usize >( )
1121- && ( !mem:: size_of:: <T >( ) . is_power_of_two( )
1122- || mem:: size_of:: <T >( ) > mem:: size_of:: <usize >( ) * 2 )
1123- {
1124- attempt_swap_as_chunks!( usize ) ;
1125- attempt_swap_as_chunks!( u8 ) ;
1126- }
1127-
1128- // SAFETY: Same preconditions as this function
1129- unsafe { swap_nonoverlapping_simple_untyped( x, y, count) }
11301111 }
11311112 )
11321113}
11331114
11341115/// Same behavior and safety conditions as [`swap_nonoverlapping`]
1135- ///
1136- /// LLVM can vectorize this (at least it can for the power-of-two-sized types
1137- /// `swap_nonoverlapping` tries to use) so no need to manually SIMD it.
11381116#[ inline]
1139- const unsafe fn swap_nonoverlapping_simple_untyped < T > ( x : * mut T , y : * mut T , count : usize ) {
1117+ const unsafe fn swap_nonoverlapping_const < T > ( x : * mut T , y : * mut T , count : usize ) {
11401118 let x = x. cast :: < MaybeUninit < T > > ( ) ;
11411119 let y = y. cast :: < MaybeUninit < T > > ( ) ;
11421120 let mut i = 0 ;
@@ -1147,13 +1125,6 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
11471125 // and it's distinct from `x` since the ranges are non-overlapping
11481126 let y = unsafe { y. add ( i) } ;
11491127
1150- // If we end up here, it's because we're using a simple type -- like
1151- // a small power-of-two-sized thing -- or a special type with particularly
1152- // large alignment, particularly SIMD types.
1153- // Thus, we're fine just reading-and-writing it, as either it's small
1154- // and that works well anyway or it's special and the type's author
1155- // presumably wanted things to be done in the larger chunk.
1156-
11571128 // SAFETY: we're only ever given pointers that are valid to read/write,
11581129 // including being aligned, and nothing here panics so it's drop-safe.
11591130 unsafe {
@@ -1167,6 +1138,72 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
11671138 }
11681139}
11691140
1141+ // Don't let MIR inline this, because we really want it to keep its noalias metadata
1142+ #[ rustc_no_mir_inline]
1143+ #[ inline]
1144+ fn swap_chunk < const N : usize > ( x : & mut MaybeUninit < [ u8 ; N ] > , y : & mut MaybeUninit < [ u8 ; N ] > ) {
1145+ let a = * x;
1146+ let b = * y;
1147+ * x = b;
1148+ * y = a;
1149+ }
1150+
1151+ #[ inline]
1152+ unsafe fn swap_nonoverlapping_bytes ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1153+ // Same as `swap_nonoverlapping::<[u8; N]>`.
1154+ #[ inline]
1155+ unsafe fn swap_nonoverlapping_chunks < const N : usize > (
1156+ x : * mut MaybeUninit < [ u8 ; N ] > ,
1157+ y : * mut MaybeUninit < [ u8 ; N ] > ,
1158+ chunks : NonZero < usize > ,
1159+ ) {
1160+ let chunks = chunks. get ( ) ;
1161+ for i in 0 ..chunks {
1162+ // SAFETY: i is in [0, chunks) so the adds and dereferences are in-bounds.
1163+ unsafe { swap_chunk ( & mut * x. add ( i) , & mut * y. add ( i) ) } ;
1164+ }
1165+ }
1166+
1167+ // Same as `swap_nonoverlapping_bytes`, but accepts at most 1+2+4=7 bytes
1168+ #[ inline]
1169+ unsafe fn swap_nonoverlapping_short ( x : * mut u8 , y : * mut u8 , bytes : NonZero < usize > ) {
1170+ let bytes = bytes. get ( ) ;
1171+ let mut i = 0 ;
1172+ macro_rules! swap_prefix {
1173+ ( $( $n: literal) +) => { $(
1174+ if ( bytes & $n) != 0 {
1175+ // SAFETY: `i` can only have the same bits set as those in bytes,
1176+ // so these `add`s are in-bounds of `bytes`. But the bit for
1177+ // `$n` hasn't been set yet, so the `$n` bytes that `swap_chunk`
1178+ // will read and write are within the usable range.
1179+ unsafe { swap_chunk:: <$n>( & mut * x. add( i) . cast( ) , & mut * y. add( i) . cast( ) ) } ;
1180+ i |= $n;
1181+ }
1182+ ) +} ;
1183+ }
1184+ swap_prefix ! ( 4 2 1 ) ;
1185+ debug_assert_eq ! ( i, bytes) ;
1186+ }
1187+
1188+ const CHUNK_SIZE : usize = size_of :: < * const ( ) > ( ) ;
1189+ let bytes = bytes. get ( ) ;
1190+
1191+ let chunks = bytes / CHUNK_SIZE ;
1192+ let tail = bytes % CHUNK_SIZE ;
1193+ if let Some ( chunks) = NonZero :: new ( chunks) {
1194+ // SAFETY: this is bytes/CHUNK_SIZE*CHUNK_SIZE bytes, which is <= bytes,
1195+ // so it's within the range of our non-overlapping bytes.
1196+ unsafe { swap_nonoverlapping_chunks :: < CHUNK_SIZE > ( x. cast ( ) , y. cast ( ) , chunks) } ;
1197+ }
1198+ if let Some ( tail) = NonZero :: new ( tail) {
1199+ const { assert ! ( CHUNK_SIZE <= 8 ) } ;
1200+ let delta = chunks * CHUNK_SIZE ;
1201+ // SAFETY: the tail length is below CHUNK SIZE because of the remainder,
1202+ // and CHUNK_SIZE is at most 8 by the const assert, so tail <= 7
1203+ unsafe { swap_nonoverlapping_short ( x. add ( delta) , y. add ( delta) , tail) } ;
1204+ }
1205+ }
1206+
11701207/// Moves `src` into the pointed `dst`, returning the previous `dst` value.
11711208///
11721209/// Neither value is dropped.
0 commit comments