@@ -200,6 +200,7 @@ impl FloatDivision for f128 {
200200}
201201
202202extern crate std;
203+ use core:: ops;
203204#[ allow( unused) ]
204205use std:: { dbg, fmt, println} ;
205206
@@ -217,6 +218,7 @@ where
217218 F :: Int : CastInto < u64 > ,
218219 F :: Int : CastInto < i64 > ,
219220 F :: Int : HInt + DInt ,
221+ <F :: Int as HInt >:: D : ops:: Shr < u32 , Output = <F :: Int as HInt >:: D > ,
220222 F :: SignedInt : CastFrom < u32 > ,
221223 F :: SignedInt : CastInto < u32 > ,
222224 F :: SignedInt : CastFrom < F :: Int > ,
@@ -237,6 +239,8 @@ where
237239{
238240 let one = F :: Int :: ONE ;
239241 let zero = F :: Int :: ZERO ;
242+ let one_hw = HalfRep :: < F > :: ONE ;
243+ let zero_hw = HalfRep :: < F > :: ZERO ;
240244 let hw = F :: BITS / 2 ;
241245 let lo_mask = F :: Int :: MAX >> hw;
242246
@@ -399,9 +403,7 @@ where
399403 let c_hw = F :: C_HW ;
400404
401405 // Check that the top bit is set, i.e. value is within `[1, 2)`.
402- debug_assert ! (
403- b_uq1_hw & HalfRep :: <F >:: ONE << ( HalfRep :: <F >:: BITS - 1 ) > HalfRep :: <F >:: ZERO
404- ) ;
406+ debug_assert ! ( b_uq1_hw & one_hw << ( HalfRep :: <F >:: BITS - 1 ) > zero_hw) ;
405407
406408 // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
407409 // so x0 fits to UQ0.HW without wrapping.
@@ -437,8 +439,7 @@ where
437439 // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
438440 // expected to be strictly positive because b_UQ1_hw has its highest bit set
439441 // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
440- let corr_uq1_hw: HalfRep < F > =
441- HalfRep :: < F > :: ZERO . wrapping_sub ( x_uq0_hw. widen_mul ( b_uq1_hw) . hi ( ) ) ;
442+ let corr_uq1_hw: HalfRep < F > = zero_hw. wrapping_sub ( x_uq0_hw. widen_mul ( b_uq1_hw) . hi ( ) ) ;
442443
443444 // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
444445 // obtaining an UQ1.(HW-1) number and proving its highest bit could be
@@ -483,7 +484,7 @@ where
483484 // be not below that value (see g(x) above), so it is safe to decrement just
484485 // once after the final iteration. On the other hand, an effective value of
485486 // divisor changes after this point (from b_hw to b), so adjust here.
486- x_uq0_hw = x_uq0_hw. wrapping_sub ( HalfRep :: < F > :: ONE ) ;
487+ x_uq0_hw = x_uq0_hw. wrapping_sub ( one_hw ) ;
487488
488489 // Error estimations for full-precision iterations are calculated just
489490 // as above, but with U := 2^-W and taking extra decrementing into account.
@@ -548,13 +549,8 @@ where
548549 assert ! ( F :: BITS == 32 , "native full iterations only supports f32" ) ;
549550
550551 for _ in 0 ..full_iterations {
551- let corr_uq1: u32 = 0 . wrapping_sub (
552- ( ( CastInto :: < u32 > :: cast ( x_uq0) as u64 ) * ( CastInto :: < u32 > :: cast ( b_uq1) as u64 ) )
553- >> F :: BITS ,
554- ) as u32 ;
555- x_uq0 = ( ( ( ( CastInto :: < u32 > :: cast ( x_uq0) as u64 ) * ( corr_uq1 as u64 ) ) >> ( F :: BITS - 1 ) )
556- as u32 )
557- . cast ( ) ;
552+ let corr_uq1: F :: Int = zero. wrapping_sub ( x_uq0. widen_mul ( b_uq1) . hi ( ) ) ;
553+ x_uq0 = ( x_uq0. widen_mul ( corr_uq1) >> ( F :: BITS - 1 ) ) . lo ( ) ;
558554 }
559555 }
560556
0 commit comments