@@ -125,12 +125,11 @@ macro_rules! unsigned_fn {
125125 #[ must_use = "this returns the result of the operation, \
126126 without modifying the original"]
127127 pub const fn $UnsignedT( mut n: $UnsignedT) -> $UnsignedT {
128- let leading_zeros = n. leading_zeros( ) ;
129- if leading_zeros >= <$HalfBitsT>:: BITS {
128+ if n <= <$HalfBitsT>:: MAX as $UnsignedT {
130129 $HalfBitsT( n as $HalfBitsT) as $UnsignedT
131130 } else {
132- const EVEN_BITMASK : u32 = u32 :: MAX & !1 ;
133- let normalization_shift = leading_zeros & EVEN_BITMASK ;
131+ const EVEN_MAKING_BITMASK : u32 = !1 ;
132+ let normalization_shift = n . leading_zeros( ) & EVEN_MAKING_BITMASK ;
134133 n <<= normalization_shift;
135134
136135 let s = $stages!( n) ;
@@ -155,6 +154,28 @@ macro_rules! first_stage {
155154/// Generates a middle stage of the computation.
156155macro_rules! middle_stage {
157156 ( $original_bits: literal, $ty: ty, $n: ident, $s: ident, $r: ident) => { {
157+ // SAFETY: Inform the optimizer that `$s` is nonzero. This will allow
158+ // it to avoid generating code to handle division-by-zero panics in the
159+ // divisions below.
160+ //
161+ // If the original `$n` is zero, the top of the `unsigned_fn` macro
162+ // recurses instead of continuing to this point, so the original `$n`
163+ // wasn't a 0 if we've reached here.
164+ //
165+ // Then the `unsigned_fn` macro normalizes `$n` so that at least one of
166+ // the two most-significant bits is a 1.
167+ //
168+ // Then these stages take as many of the most-significant bits of `$n`
169+ // that fit in this stage's type. For example, the stage that handles
170+ // `u32` deals with the 32 most-significant bits of `$n`. This means
171+ // that each stage has at least one 1 bit in `n`'s two most-significant
172+ // bits, making `n` nonzero.
173+ //
174+ // Then, the stage previous to this produces `$s` as the correct
175+ // integer square root for the previous type. Since it was taking the
176+ // integer square root of a nonzero number, `$s` will be nonzero.
177+ unsafe { crate :: hint:: assert_unchecked( $s != 0 ) } ;
178+
158179 const N_SHIFT : u32 = $original_bits - <$ty>:: BITS ;
159180 let n = ( $n >> N_SHIFT ) as $ty;
160181
@@ -168,6 +189,7 @@ macro_rules! middle_stage {
168189 let denominator = ( $s as $ty) << 1 ;
169190 let q = numerator / denominator;
170191 let u = numerator % denominator;
192+
171193 let mut s = ( $s << QUARTER_BITS ) as $ty + q;
172194 let ( mut r, overflow) =
173195 ( ( u << QUARTER_BITS ) | ( lo & LOWEST_QUARTER_1_BITS ) ) . overflowing_sub( q * q) ;
@@ -182,13 +204,21 @@ macro_rules! middle_stage {
182204/// Generates the last stage of the computation before denormalization.
183205macro_rules! last_stage {
184206 ( $ty: ty, $n: ident, $s: ident, $r: ident) => { {
207+ // SAFETY: Inform the optimizer that `$s` is nonzero. This will allow
208+ // it to avoid generating code to handle division-by-zero panics in the
209+ // divisions below.
210+ //
211+ // See the proof in the `middle_stage` macro above.
212+ unsafe { core:: hint:: assert_unchecked( $s != 0 ) } ;
213+
185214 const HALF_BITS : u32 = <$ty>:: BITS >> 1 ;
186215 const QUARTER_BITS : u32 = <$ty>:: BITS >> 2 ;
187216 const LOWER_HALF_1_BITS : $ty = ( 1 << HALF_BITS ) - 1 ;
188217
189218 let lo = $n & LOWER_HALF_1_BITS ;
190219 let numerator = ( ( $r as $ty) << QUARTER_BITS ) | ( lo >> QUARTER_BITS ) ;
191220 let denominator = ( $s as $ty) << 1 ;
221+
192222 let q = numerator / denominator;
193223 let mut s = ( $s << QUARTER_BITS ) as $ty + q;
194224 let ( s_squared, overflow) = s. overflowing_mul( s) ;
0 commit comments