@@ -88,47 +88,11 @@ trait FloatDivision: Float
8888where
8989 Self :: Int : DInt ,
9090{
91- // /// Iterations that are done at half of the float's width, done for optimization.
92- // const HALF_ITERATIONS: usize;
93-
94- // /// Iterations that are done at the full float's width. Must be at least one.
95- // const FULL_ITERATIONS: usize = 1;
96-
97- // const USE_NATIVE_FULL_ITERATIONS: bool = size_of::<Self>() < size_of::<*const ()>();
98-
9991 /// C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
10092 /// with W0 being either 16 or 32 and W0 <= HW.
10193 /// That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which
10294 /// b/2 is subtracted to obtain x0) wrapped to [0, 1) range.
10395 const C_HW : HalfRep < Self > ;
104-
105- // const RECIPROCAL_PRECISION: u16 = {
106- // // Do some related configuration validation
107- // if !Self::USE_NATIVE_FULL_ITERATIONS {
108- // if Self::FULL_ITERATIONS != 1 {
109- // panic!("Only a single emulated full iteration is supported");
110- // }
111- // if !(Self::HALF_ITERATIONS > 0) {
112- // panic!("Invalid number of half iterations");
113- // }
114- // }
115-
116- // if Self::FULL_ITERATIONS < 1 {
117- // panic!("Must have at least one full iteration");
118- // }
119-
120- // if Self::BITS == 32 && Self::HALF_ITERATIONS == 2 && Self::FULL_ITERATIONS == 1 {
121- // 74u16
122- // } else if Self::BITS == 32 && Self::HALF_ITERATIONS == 0 && Self::FULL_ITERATIONS == 3 {
123- // 10
124- // } else if Self::BITS == 64 && Self::HALF_ITERATIONS == 3 && Self::FULL_ITERATIONS == 1 {
125- // 220
126- // } else if Self::BITS == 128 && Self::HALF_ITERATIONS == 4 && Self::FULL_ITERATIONS == 1 {
127- // 13922
128- // } else {
129- // panic!("Invalid number of iterations")
130- // }
131- // };
13296}
13397
13498/// Calculate the number of iterations required to get needed precision of a float type.
@@ -144,8 +108,9 @@ const fn calc_iterations<F: Float>() -> (usize, usize) {
144108 // Precision doubles with each iteration
145109 let total_iterations = F :: BITS . ilog2 ( ) as usize - 2 ;
146110
147- if size_of :: < F > ( ) < size_of :: < * const ( ) > ( ) {
148- // No need to use half iterations if math at the half
111+ // If widening multiply will be efficient (uses word-sized integers), there is no reason
112+ // to use half-sized iterations.
113+ if 2 * size_of :: < F > ( ) <= size_of :: < * const ( ) > ( ) {
149114 ( 0 , total_iterations)
150115 } else {
151116 ( total_iterations - 1 , 1 )
@@ -201,9 +166,6 @@ const fn reciprocal_precision<F: Float>() -> u16 {
201166}
202167
203168impl FloatDivision for f32 {
204- // const HALF_ITERATIONS: usize = 0;
205- // const FULL_ITERATIONS: usize = 3;
206-
207169 /// Use 16-bit initial estimation in case we are using half-width iterations
208170 /// for float32 division. This is expected to be useful for some 16-bit
209171 /// targets. Not used by default as it requires performing more work during
@@ -573,15 +535,9 @@ where
573535 x_uq0
574536 } ;
575537
576- if full_iterations > 1 {
577- // Need to use concrete types since `F::Int::D` might not support math. So, restrict to
578- // one type.
579- // assert!(F::BITS == 32, "native full iterations only supports f32");
580-
581- for _ in 0 ..full_iterations {
582- let corr_uq1: F :: Int = zero. wrapping_sub ( x_uq0. widen_mul ( b_uq1) . hi ( ) ) ;
583- x_uq0 = ( x_uq0. widen_mul ( corr_uq1) >> ( F :: BITS - 1 ) ) . lo ( ) ;
584- }
538+ for _ in 0 ..full_iterations {
539+ let corr_uq1: F :: Int = zero. wrapping_sub ( x_uq0. widen_mul ( b_uq1) . hi ( ) ) ;
540+ x_uq0 = ( x_uq0. widen_mul ( corr_uq1) >> ( F :: BITS - 1 ) ) . lo ( ) ;
585541 }
586542
587543 // Finally, account for possible overflow, as explained above.
0 commit comments