@@ -1209,21 +1209,28 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
12091209 throw_ub_custom ! ( fluent:: const_eval_copy_nonoverlapping_overlapping) ;
12101210 }
12111211 }
1212+ }
12121213
1213- for i in 0 ..num_copies {
1214- ptr:: copy (
1215- src_bytes,
1216- dest_bytes. add ( ( size * i) . bytes_usize ( ) ) , // `Size` multiplication
1217- size. bytes_usize ( ) ,
1218- ) ;
1214+ let size_in_bytes = size. bytes_usize ( ) ;
1215+ // For particularly large arrays (where this is perf-sensitive) it's common that
1216+ // we're writing a single byte repeatedly. So, optimize that case to a memset.
1217+ if size_in_bytes == 1 && num_copies >= 1 {
1218+ // SAFETY: `src_bytes` would be read from anyway by copies below (num_copies >= 1).
1219+ // Since size_in_bytes = 1, then the `init.no_bytes_init()` check above guarantees
1220+ // that this read at type `u8` is OK -- it must be an initialized byte.
1221+ let value = * src_bytes;
1222+ dest_bytes. write_bytes ( value, ( size * num_copies) . bytes_usize ( ) ) ;
1223+ } else if src_alloc_id == dest_alloc_id {
1224+ let mut dest_ptr = dest_bytes;
1225+ for _ in 0 ..num_copies {
1226+ ptr:: copy ( src_bytes, dest_ptr, size_in_bytes) ;
1227+ dest_ptr = dest_ptr. add ( size_in_bytes) ;
12191228 }
12201229 } else {
1221- for i in 0 ..num_copies {
1222- ptr:: copy_nonoverlapping (
1223- src_bytes,
1224- dest_bytes. add ( ( size * i) . bytes_usize ( ) ) , // `Size` multiplication
1225- size. bytes_usize ( ) ,
1226- ) ;
1230+ let mut dest_ptr = dest_bytes;
1231+ for _ in 0 ..num_copies {
1232+ ptr:: copy_nonoverlapping ( src_bytes, dest_ptr, size_in_bytes) ;
1233+ dest_ptr = dest_ptr. add ( size_in_bytes) ;
12271234 }
12281235 }
12291236 }
0 commit comments