33#![ allow( clippy:: needless_return) ]
44
55use crate :: float:: Float ;
6- use crate :: int:: { CastInto , DInt , HInt , Int } ;
6+ use crate :: int:: { CastInto , DInt , HInt , Int , MinInt } ;
7+
8+ use super :: HalfRep ;
79
810fn div32 < F : Float > ( a : F , b : F ) -> F
911where
@@ -454,15 +456,20 @@ where
454456
455457fn div64 < F : Float > ( a : F , b : F ) -> F
456458where
457- u32 : CastInto < F :: Int > ,
458459 F :: Int : CastInto < u32 > ,
459- i32 : CastInto < F :: Int > ,
460460 F :: Int : CastInto < i32 > ,
461- u64 : CastInto < F :: Int > ,
461+ F :: Int : CastInto < HalfRep < F > > ,
462+ F :: Int : From < HalfRep < F > > ,
463+ F :: Int : From < u8 > ,
462464 F :: Int : CastInto < u64 > ,
463- i64 : CastInto < F :: Int > ,
464465 F :: Int : CastInto < i64 > ,
465- F :: Int : HInt ,
466+ F :: Int : HInt + DInt ,
467+ u16 : CastInto < F :: Int > ,
468+ i32 : CastInto < F :: Int > ,
469+ i64 : CastInto < F :: Int > ,
470+ u32 : CastInto < F :: Int > ,
471+ u64 : CastInto < F :: Int > ,
472+ u64 : CastInto < HalfRep < F > > ,
466473{
467474 const NUMBER_OF_HALF_ITERATIONS : usize = 3 ;
468475 const NUMBER_OF_FULL_ITERATIONS : usize = 1 ;
@@ -471,7 +478,7 @@ where
471478 let one = F :: Int :: ONE ;
472479 let zero = F :: Int :: ZERO ;
473480 let hw = F :: BITS / 2 ;
474- let lo_mask = u64 :: MAX >> hw;
481+ let lo_mask = F :: Int :: MAX >> hw;
475482
476483 let significand_bits = F :: SIGNIFICAND_BITS ;
477484 let max_exponent = F :: EXPONENT_MAX ;
@@ -616,21 +623,23 @@ where
616623
617624 let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 {
618625 // Starting with (n-1) half-width iterations
619- let b_uq1_hw: u32 =
620- ( CastInto :: < u64 > :: cast ( b_significand) >> ( significand_bits + 1 - hw) ) as u32 ;
626+ let b_uq1_hw: HalfRep < F > = CastInto :: < HalfRep < F > > :: cast (
627+ CastInto :: < u64 > :: cast ( b_significand) >> ( significand_bits + 1 - hw) ,
628+ ) ;
621629
622630 // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
623631 // with W0 being either 16 or 32 and W0 <= HW.
624632 // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
625633 // b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
626634
627635 // HW is at least 32. Shifting into the highest bits if needed.
628- let c_hw = ( 0x7504F333_u64 as u32 ) . wrapping_shl ( hw. wrapping_sub ( 32 ) ) ;
636+ let c_hw = ( CastInto :: < HalfRep < F > > :: cast ( 0x7504F333_u64 ) ) . wrapping_shl ( hw. wrapping_sub ( 32 ) ) ;
629637
630638 // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572,
631639 // so x0 fits to UQ0.HW without wrapping.
632- let x_uq0_hw: u32 = {
633- let mut x_uq0_hw: u32 = c_hw. wrapping_sub ( b_uq1_hw /* exact b_hw/2 as UQ0.HW */ ) ;
640+ let x_uq0_hw: HalfRep < F > = {
641+ let mut x_uq0_hw: HalfRep < F > =
642+ c_hw. wrapping_sub ( b_uq1_hw /* exact b_hw/2 as UQ0.HW */ ) ;
634643 // dbg!(x_uq0_hw);
635644 // An e_0 error is comprised of errors due to
636645 // * x0 being an inherently imprecise first approximation of 1/b_hw
@@ -661,8 +670,9 @@ where
661670 // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is
662671 // expected to be strictly positive because b_UQ1_hw has its highest bit set
663672 // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1).
664- let corr_uq1_hw: u32 =
665- 0 . wrapping_sub ( ( ( x_uq0_hw as u64 ) . wrapping_mul ( b_uq1_hw as u64 ) ) >> hw) as u32 ;
673+ let corr_uq1_hw: HalfRep < F > = CastInto :: < HalfRep < F > > :: cast ( zero. wrapping_sub (
674+ ( ( F :: Int :: from ( x_uq0_hw) ) . wrapping_mul ( F :: Int :: from ( b_uq1_hw) ) ) >> hw,
675+ ) ) ;
666676 // dbg!(corr_uq1_hw);
667677
668678 // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally
@@ -677,7 +687,9 @@ where
677687 // The fact corr_UQ1_hw was virtually round up (due to result of
678688 // multiplication being **first** truncated, then negated - to improve
679689 // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw.
680- x_uq0_hw = ( ( x_uq0_hw as u64 ) . wrapping_mul ( corr_uq1_hw as u64 ) >> ( hw - 1 ) ) as u32 ;
690+ x_uq0_hw = ( ( F :: Int :: from ( x_uq0_hw) ) . wrapping_mul ( F :: Int :: from ( corr_uq1_hw) )
691+ >> ( hw - 1 ) )
692+ . cast ( ) ;
681693 // dbg!(x_uq0_hw);
682694 // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t
683695 // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after
@@ -707,7 +719,7 @@ where
707719 // be not below that value (see g(x) above), so it is safe to decrement just
708720 // once after the final iteration. On the other hand, an effective value of
709721 // divisor changes after this point (from b_hw to b), so adjust here.
710- x_uq0_hw. wrapping_sub ( 1_u32 )
722+ x_uq0_hw. wrapping_sub ( HalfRep :: < F > :: ONE )
711723 } ;
712724
713725 // Error estimations for full-precision iterations are calculated just
@@ -717,7 +729,7 @@ where
717729 // Simulating operations on a twice_rep_t to perform a single final full-width
718730 // iteration. Using ad-hoc multiplication implementations to take advantage
719731 // of particular structure of operands.
720- let blo: u64 = ( CastInto :: < u64 > :: cast ( b_uq1) ) & lo_mask;
732+ let blo: F :: Int = b_uq1 & lo_mask;
721733 // x_UQ0 = x_UQ0_hw * 2^HW - 1
722734 // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1
723735 //
@@ -726,19 +738,20 @@ where
726738 // + [ x_UQ0_hw * blo ]
727739 // - [ b_UQ1 ]
728740 // = [ result ][.... discarded ...]
729- let corr_uq1 = negate_u64 (
730- ( x_uq0_hw as u64 ) * ( b_uq1_hw as u64 ) + ( ( ( x_uq0_hw as u64 ) * ( blo) ) >> hw) - 1 ,
731- ) ; // account for *possible* carry
732- let lo_corr = corr_uq1 & lo_mask;
733- let hi_corr = corr_uq1 >> hw;
741+ let corr_uq1: F :: Int = ( F :: Int :: from ( x_uq0_hw) * F :: Int :: from ( b_uq1_hw)
742+ + ( ( F :: Int :: from ( x_uq0_hw) * blo) >> hw) )
743+ . wrapping_sub ( one)
744+ . wrapping_neg ( ) ; // account for *possible* carry
745+ let lo_corr: F :: Int = corr_uq1 & lo_mask;
746+ let hi_corr: F :: Int = corr_uq1 >> hw;
734747 // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1
735- let mut x_uq0: < F as Float > :: Int = ( ( ( ( x_uq0_hw as u64 ) * hi_corr) << 1 )
736- . wrapping_add ( ( ( x_uq0_hw as u64 ) * lo_corr) >> ( hw - 1 ) )
737- . wrapping_sub ( 2 ) )
738- . cast ( ) ; // 1 to account for the highest bit of corr_UQ1 can be 1
739- // 1 to account for possible carry
740- // Just like the case of half-width iterations but with possibility
741- // of overflowing by one extra Ulp of x_UQ0.
748+ let mut x_uq0: F :: Int = ( ( F :: Int :: from ( x_uq0_hw) * hi_corr) << 1 )
749+ . wrapping_add ( ( F :: Int :: from ( x_uq0_hw) * lo_corr) >> ( hw - 1 ) )
750+ . wrapping_sub ( F :: Int :: from ( 2u8 ) ) ;
751+ // 1 to account for the highest bit of corr_UQ1 can be 1
752+ // 1 to account for possible carry
753+ // Just like the case of half-width iterations but with possibility
754+ // of overflowing by one extra Ulp of x_UQ0.
742755 x_uq0 -= one;
743756 // ... and then traditional fixup by 2 should work
744757
@@ -755,8 +768,8 @@ where
755768 x_uq0
756769 } else {
757770 // C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n
758- let c: < F as Float > :: Int = ( 0x7504F333 << ( F :: BITS - 32 ) ) . cast ( ) ;
759- let x_uq0: < F as Float > :: Int = c. wrapping_sub ( b_uq1) ;
771+ let c: F :: Int = ( 0x7504F333 << ( F :: BITS - 32 ) ) . cast ( ) ;
772+ let x_uq0: F :: Int = c. wrapping_sub ( b_uq1) ;
760773 // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64
761774 x_uq0
762775 } ;
@@ -806,7 +819,7 @@ where
806819 // Now 1/b - (2*P) * 2^-W < x < 1/b
807820 // FIXME Is x_UQ0 still >= 0.5?
808821
809- let mut quotient: < F as Float > :: Int = x_uq0. widen_mul ( a_significand << 1 ) . hi ( ) ;
822+ let mut quotient: F :: Int = x_uq0. widen_mul ( a_significand << 1 ) . hi ( ) ;
810823 // Now, a/b - 4*P * 2^-W < q < a/b for q=<quotient_UQ1:dummy> in UQ1.(SB+1+W).
811824
812825 // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1),
@@ -868,7 +881,7 @@ where
868881 // r = a - b * q
869882 let abs_result = if written_exponent > 0 {
870883 let mut ret = quotient & significand_mask;
871- ret |= ( ( written_exponent as u64 ) << significand_bits) . cast ( ) ;
884+ ret |= written_exponent. cast ( ) << significand_bits;
872885 residual <<= 1 ;
873886 ret
874887 } else {
0 commit comments