1+ /*
2+
3+ Solving for `a / b`, which is `res = m_a*2^p_a / m_b*2^p_b`.
4+
5+ - Separate the exponent and significand
6+ `res = (m_a / m_b) * 2^(p_a - p_b)`
7+ - Check for early exits
8+ - If `a` or `b` are subnormal, normalize by shifting the mantissa and adjusting the exponent.
9+ - Shift the significand (with implicit bit) fully left so that arithmetic can happen with greater
10+ precision.
11+ - Calculate the reciprocal of `b`, `r`
12+ - Multiply: `res = m_a * r_b * 2^(p_a - p_b)`
13+
14+ The most complicated part of this process is calculating the reciprocal.
15+
16+ Note that variables named e.g. `uq0` refer to Q notation. E.g. Q1.31 refers to a fixed-point
17+ number that has 1 bit of integer and 31 bits of decimal.
18+
19+ */
20+
121use crate :: float:: Float ;
222use crate :: int:: { CastFrom , CastInto , DInt , HInt , Int , MinInt } ;
323
@@ -8,8 +28,9 @@ trait FloatDivision: Float
828where
929 Self :: Int : DInt ,
1030{
11- /// Iterations that are done at half of the float's width
31+ /// Iterations that are done at half of the float's width, done for optimization.
1232 const HALF_ITERATIONS : usize ;
33+
1334 /// Iterations that are done at the full float's width. Must be at least one.
1435 const FULL_ITERATIONS : usize ;
1536
5172 }
5273 }
5374
75+ if Self :: FULL_ITERATIONS < 1 {
76+ panic ! ( "Must have at least one full iteration" ) ;
77+ }
78+
5479 if Self :: BITS == 32 && Self :: HALF_ITERATIONS == 2 && Self :: FULL_ITERATIONS == 1 {
5580 74u16
5681 } else if Self :: BITS == 32 && Self :: HALF_ITERATIONS == 0 && Self :: FULL_ITERATIONS == 3 {
@@ -84,6 +109,18 @@ impl FloatDivision for f64 {
84109 const C_HW : HalfRep < Self > = 0x7504F333 << ( HalfRep :: < Self > :: BITS - 32 ) ;
85110}
86111
112+ #[ cfg( not( feature = "no-f16-f128" ) ) ]
113+ impl FloatDivision for f128 {
114+ const HALF_ITERATIONS : usize = 4 ;
115+ const FULL_ITERATIONS : usize = 1 ;
116+
117+ const C_HW : HalfRep < Self > = 0x7504F333 << ( HalfRep :: < Self > :: BITS - 32 ) ;
118+ }
119+
120+ extern crate std;
121+ #[ allow( unused) ]
122+ use std:: { dbg, fmt, println} ;
123+
87124fn div < F > ( a : F , b : F ) -> F
88125where
89126 F : FloatDivision ,
@@ -108,6 +145,11 @@ where
108145 u64 : CastInto < F :: Int > ,
109146 u64 : CastInto < HalfRep < F > > ,
110147 u128 : CastInto < F :: Int > ,
148+
149+ // debugging
150+ F :: Int : fmt:: LowerHex ,
151+ F :: Int : fmt:: Display ,
152+ F :: SignedInt : fmt:: Display ,
111153{
112154 let one = F :: Int :: ONE ;
113155 let zero = F :: Int :: ZERO ;
@@ -131,16 +173,17 @@ where
131173 let a_rep = a. repr ( ) ;
132174 let b_rep = b. repr ( ) ;
133175
134- // FIXME(tgross35): use u32/i32 and not `Int` to store exponents, since that is enough for up to
135- // `f256`. This should make f128 div faster.
136176 // Exponent numeric representationm not accounting for bias
137177 let a_exponent = ( a_rep >> significand_bits) & exponent_sat;
138178 let b_exponent = ( b_rep >> significand_bits) & exponent_sat;
139179 let quotient_sign = ( a_rep ^ b_rep) & sign_bit;
140180
141181 let mut a_significand = a_rep & significand_mask;
142182 let mut b_significand = b_rep & significand_mask;
143- let mut scale = 0 ;
183+
184+ // The exponent of our final result in its encoded form
185+ let mut res_exponent: i32 =
186+ i32:: cast_from ( a_exponent) - i32:: cast_from ( b_exponent) + ( exponent_bias as i32 ) ;
144187
145188 // Detect if a or b is zero, denormal, infinity, or NaN.
146189 if a_exponent. wrapping_sub ( one) >= ( exponent_sat - one)
@@ -193,33 +236,35 @@ where
193236 // adjustment.
194237 if a_abs < implicit_bit {
195238 let ( exponent, significand) = F :: normalize ( a_significand) ;
196- scale += exponent;
239+ res_exponent += exponent;
197240 a_significand = significand;
198241 }
199242
200243 // b is denormal. Renormalize it and set the scale to include the necessary exponent
201244 // adjustment.
202245 if b_abs < implicit_bit {
203246 let ( exponent, significand) = F :: normalize ( b_significand) ;
204- scale -= exponent;
247+ res_exponent -= exponent;
205248 b_significand = significand;
206249 }
207250 }
208251
209- // Set the implicit significand bit. If we fell through from the
252+ // Set the implicit significand bit. If we fell through from the
210253 // denormal path it was already set by normalize( ), but setting it twice
211254 // won't hurt anything.
212255 a_significand |= implicit_bit;
213256 b_significand |= implicit_bit;
214257
215- let mut written_exponent: F :: SignedInt = F :: SignedInt :: from_unsigned (
216- ( a_exponent
217- . wrapping_sub ( b_exponent)
218- . wrapping_add ( scale. cast ( ) ) )
219- . wrapping_add ( exponent_bias. cast ( ) ) ,
258+ println ! ( "a sig: {:#034x}\n b sig: {:#034x}\n a exp: {a_exponent}, b exp: {b_exponent}, written: {res_exponent}" ,
259+ a_significand,
260+ b_significand,
220261 ) ;
262+
263+ // Transform to a fixed-point representation
221264 let b_uq1 = b_significand << ( F :: BITS - significand_bits - 1 ) ;
222265
266+ println ! ( "b_uq1: {:#034x}" , b_uq1) ;
267+
223268 // Align the significand of b as a UQ1.(n-1) fixed-point number in the range
224269 // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax
225270 // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2.
@@ -257,7 +302,9 @@ where
257302 // mode into account!
258303 let mut x_uq0 = if F :: HALF_ITERATIONS > 0 {
259304 // Starting with (n-1) half-width iterations
260- let b_uq1_hw: HalfRep < F > = ( b_significand >> ( significand_bits + 1 - hw) ) . cast ( ) ;
305+ let b_uq1_hw: HalfRep < F > = b_uq1. hi ( ) ;
306+
307+ // (b_significand >> (significand_bits + 1 - hw)).cast();
261308
262309 // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW
263310 // with W0 being either 16 or 32 and W0 <= HW.
@@ -446,7 +493,7 @@ where
446493 // effectively doubling its value as well as its error estimation.
447494 let residual_lo = ( a_significand << ( significand_bits + 1 ) )
448495 . wrapping_sub ( quotient_uq1. wrapping_mul ( b_significand) ) ;
449- written_exponent -= F :: SignedInt :: ONE ;
496+ res_exponent -= 1 ;
450497 a_significand <<= 1 ;
451498 residual_lo
452499 } else {
@@ -484,29 +531,30 @@ where
484531 // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required)
485532 //
486533 // If we have overflowed the exponent, return infinity
487- if written_exponent >= F :: SignedInt :: cast_from ( exponent_sat) {
534+ if res_exponent >= i32 :: cast_from ( exponent_sat) {
488535 return F :: from_repr ( inf_rep | quotient_sign) ;
489536 }
490537
491538 // Now, quotient <= the correctly-rounded result
492539 // and may need taking NextAfter() up to 3 times (see error estimates above)
493540 // r = a - b * q
494- let mut abs_result = if written_exponent > F :: SignedInt :: ZERO {
541+ let mut abs_result = if res_exponent > 0 {
495542 let mut ret = quotient & significand_mask;
496- ret |= written_exponent . unsigned ( ) << significand_bits;
543+ ret |= F :: Int :: from ( res_exponent as u32 ) << significand_bits;
497544 residual_lo <<= 1 ;
498545 ret
499546 } else {
500- if ( F :: SignedInt :: cast_from ( significand_bits) + written_exponent ) < F :: SignedInt :: ZERO {
547+ if ( ( significand_bits as i32 ) + res_exponent ) < 0 {
501548 return F :: from_repr ( quotient_sign) ;
502549 }
503550
504- let ret = quotient. wrapping_shr ( u32:: cast_from ( written_exponent . wrapping_neg ( ) ) + 1 ) ;
551+ let ret = quotient. wrapping_shr ( u32:: cast_from ( res_exponent . wrapping_neg ( ) ) + 1 ) ;
505552 residual_lo = a_significand
506- . wrapping_shl ( significand_bits. wrapping_add ( CastInto :: < u32 > :: cast ( written_exponent ) ) )
553+ . wrapping_shl ( significand_bits. wrapping_add ( CastInto :: < u32 > :: cast ( res_exponent ) ) )
507554 . wrapping_sub ( ret. wrapping_mul ( b_significand) << 1 ) ;
508555 ret
509556 } ;
557+ dbg ! ( res_exponent) ;
510558
511559 residual_lo += abs_result & one; // tie to even
512560 // conditionally turns the below LT comparison into LTE
@@ -539,6 +587,13 @@ intrinsics! {
539587 div( a, b)
540588 }
541589
590+ #[ avr_skip]
591+ #[ ppc_alias = __divkf3]
592+ #[ cfg( not( feature = "no-f16-f128" ) ) ]
593+ pub extern "C" fn __divtf3( a: f128, b: f128) -> f128 {
594+ div( a, b)
595+ }
596+
542597 #[ cfg( target_arch = "arm" ) ]
543598 pub extern "C" fn __divsf3vfp( a: f32 , b: f32 ) -> f32 {
544599 a / b
0 commit comments