Skip to content

Commit c8f22ca

Browse files
committed
Auto merge of #148737 - zachs18:unit-is-zero, r=joboet
Implement IsZero for (), and optimize `IsZero::is_zero` for arrays These are probably not super useful optimizations, but they make it so that `vec![expr; LARGE_LENGTH]` has better performance for some `expr`s, e.g. * array of length zero in debug mode * tuple containing `()` and zero-valued integers in debug and release mode * array of `()` or other zero-sized `IsZero` type in debug mode <details> <summary>very rough benchmarks</summary> ```Rust use std::time::Instant; use std::sync::atomic::{AtomicUsize, Ordering::Relaxed}; struct NonCopyZst; static COUNTER: AtomicUsize = AtomicUsize::new(0); impl Clone for NonCopyZst { fn clone(&self) -> Self { COUNTER.fetch_add(1, Relaxed); Self } } macro_rules! timeit { ($e:expr) => { let start = Instant::now(); _ = $e; println!("{:56}: {:?}", stringify!($e), start.elapsed()); }; } fn main() { timeit!(vec![[String::from("hello"); 0]; 1_000_000_000]); // gets a lot better in debug mode timeit!(vec![(0u8, (), 0u16); 1_000_000_000]); // gets a lot better in debug *and* release mode timeit!(vec![[[(); 37]; 1_000_000_000]; 1_000_000_000]); // gets a lot better in debug mode timeit!(vec![[NonCopyZst; 0]; 1_000_000_000]); // gets a lot better in debug mode timeit!(vec![[[1u8; 0]; 1_000_000]; 1_000_000]); // gets a little bit better in debug mode timeit!(vec![[[(); 37]; 1_000_000]; 1_000_000]); // gets a little bit better in debug mode timeit!(vec![[[1u128; 0]; 1_000_000]; 1_000_000]); // gets a little bit better in debug mode // check that we don't regress existing optimizations timeit!(vec![(0u8, 0u16); 1_000_000_000]); // about the same time timeit!(vec![0u32; 1_000_000_000]); // about the same time // check that we still call clone for non-IsZero ZSTs timeit!(vec![[const { NonCopyZst }; 2]; 1_000]); // about the same time assert_eq!(COUNTER.load(Relaxed), 1998); timeit!(vec![NonCopyZst; 10_000]); // about the same time assert_eq!(COUNTER.load(Relaxed), 1998 + 9_999); } ``` ```rs $ cargo +nightly run // ... vec![[String::from("hello"); 0]; 1_000_000_000] : 11.13999724s vec![(0u8, (), 0u16); 1_000_000_000] : 5.254646651s vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 2.738062531s vec![[NonCopyZst; 0]; 1_000_000_000] : 9.483690922s vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 2.919236ms vec![[[(); 37]; 1_000_000]; 1_000_000] : 2.927755ms vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 2.931486ms vec![(0u8, 0u16); 1_000_000_000] : 19.46µs vec![0u32; 1_000_000_000] : 9.34µs vec![[const { NonCopyZst }; 2]; 1_000] : 31.88µs vec![NonCopyZst; 10_000] : 36.519µs ``` ```rs $ cargo +dev run // ... vec![[String::from("hello"); 0]; 1_000_000_000] : 4.12µs vec![(0u8, (), 0u16); 1_000_000_000] : 16.299µs vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 210ns vec![[NonCopyZst; 0]; 1_000_000_000] : 210ns vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 170ns vec![[[(); 37]; 1_000_000]; 1_000_000] : 110ns vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 140ns vec![(0u8, 0u16); 1_000_000_000] : 11.56µs vec![0u32; 1_000_000_000] : 10.71µs vec![[const { NonCopyZst }; 2]; 1_000] : 36.08µs vec![NonCopyZst; 10_000] : 73.21µs ``` (checking release mode to make sure this doesn't regress perf there) ```rs $ cargo +nightly run --release // ... vec![[String::from("hello"); 0]; 1_000_000_000] : 70ns vec![(0u8, (), 0u16); 1_000_000_000] : 1.269457501s vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 10ns vec![[NonCopyZst; 0]; 1_000_000_000] : 20ns vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 10ns vec![[[(); 37]; 1_000_000]; 1_000_000] : 20ns vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 20ns vec![(0u8, 0u16); 1_000_000_000] : 20ns vec![0u32; 1_000_000_000] : 20ns vec![[const { NonCopyZst }; 2]; 1_000] : 2.66µs vec![NonCopyZst; 10_000] : 13.39µs ``` ```rs $ cargo +dev run --release vec![[String::from("hello"); 0]; 1_000_000_000] : 90ns vec![(0u8, (), 0u16); 1_000_000_000] : 30ns vec![[[(); 37]; 1_000_000_000]; 1_000_000_000] : 20ns vec![[NonCopyZst; 0]; 1_000_000_000] : 30ns vec![[[1u8; 0]; 1_000_000]; 1_000_000] : 20ns vec![[[(); 37]; 1_000_000]; 1_000_000] : 20ns vec![[[1u128; 0]; 1_000_000]; 1_000_000] : 20ns vec![(0u8, 0u16); 1_000_000_000] : 30ns vec![0u32; 1_000_000_000] : 20ns vec![[const { NonCopyZst }; 2]; 1_000] : 3.52µs vec![NonCopyZst; 10_000] : 17.13µs ``` </details> The specific expression I ran into a perf issue that this PR addresses is `vec![[(); LARGE]; LARGE]`, as I was trying to demonstrate `Vec::into_flattened` panicking on length overflow in the playground, but got a timeout error instead since `vec![[(); LARGE]; LARGE]` took so long to run in debug mode (it runs fine on the playground in release mode)
2 parents 29a6971 + 0aaa3ae commit c8f22ca

File tree

1 file changed

+33
-9
lines changed

1 file changed

+33
-9
lines changed

library/alloc/src/vec/is_zero.rs

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use core::mem::SizedTypeProperties;
12
use core::num::{NonZero, Saturating, Wrapping};
23

34
use crate::boxed::Box;
@@ -20,6 +21,8 @@ macro_rules! impl_is_zero {
2021
};
2122
}
2223

24+
impl_is_zero!((), |_: ()| true); // It is needed to impl for arrays and tuples of ().
25+
2326
impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8.
2427
impl_is_zero!(i16, |x| x == 0);
2528
impl_is_zero!(i32, |x| x == 0);
@@ -43,25 +46,46 @@ impl_is_zero!(f64, |x: f64| x.to_bits() == 0);
4346
// `IsZero` cannot be soundly implemented for pointers because of provenance
4447
// (see #135338).
4548

49+
unsafe impl<T, const N: usize> IsZero for [T; N] {
50+
#[inline]
51+
default fn is_zero(&self) -> bool {
52+
// If the array is of length zero,
53+
// then it doesn't actually contain any `T`s,
54+
// so `T::clone` doesn't need to be called,
55+
// and we can "zero-initialize" all zero bytes of the array.
56+
N == 0
57+
}
58+
}
59+
4660
unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] {
4761
#[inline]
4862
fn is_zero(&self) -> bool {
49-
// Because this is generated as a runtime check, it's not obvious that
50-
// it's worth doing if the array is really long. The threshold here
51-
// is largely arbitrary, but was picked because as of 2022-07-01 LLVM
52-
// fails to const-fold the check in `vec![[1; 32]; n]`
53-
// See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
54-
// Feel free to tweak if you have better evidence.
55-
56-
N <= 16 && self.iter().all(IsZero::is_zero)
63+
if T::IS_ZST {
64+
// If T is a ZST, then there is at most one possible value of `T`,
65+
// so we only need to check one element for zeroness.
66+
// We can't unconditionally return `true` here, since, e.g.
67+
// `T = [NonTrivialCloneZst; 5]` is a ZST that implements `IsZero`
68+
// due to the generic array impl, but `T::is_zero` returns `false`
69+
// since the length is not 0.
70+
self.get(0).is_none_or(IsZero::is_zero)
71+
} else {
72+
// Because this is generated as a runtime check, it's not obvious that
73+
// it's worth doing if the array is really long. The threshold here
74+
// is largely arbitrary, but was picked because as of 2022-07-01 LLVM
75+
// fails to const-fold the check in `vec![[1; 32]; n]`
76+
// See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
77+
// Feel free to tweak if you have better evidence.
78+
79+
N <= 16 && self.iter().all(IsZero::is_zero)
80+
}
5781
}
5882
}
5983

6084
// This is recursive macro.
6185
macro_rules! impl_is_zero_tuples {
6286
// Stopper
6387
() => {
64-
// No use for implementing for empty tuple because it is ZST.
88+
// We already have an impl for () above.
6589
};
6690
($first_arg:ident $(,$rest:ident)*) => {
6791
unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){

0 commit comments

Comments
 (0)