@@ -13,6 +13,19 @@ pub fn cbrt(x: f64) -> f64 {
1313 cbrt_round ( x, Round :: Nearest ) . val
1414}
1515
16+ // /// Compute the cube root of the argument.
17+ // #[cfg(f128_enabled)]
18+ // #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
19+ // pub fn cbrtf128(x: f128) -> f128 {
20+ // cbrt_round(x, Round::Nearest).val
21+ // }
22+
23+ /// Correctly rounded cube root.
24+ ///
25+ /// Algorithm:
26+ /// - Minimax initial approximation
27+ /// - `F`-sized newton iteration
28+ /// - `2xF`-sized newton iteration
1629pub fn cbrt_round < F : Float + CbrtHelper > ( x : F , round : Round ) -> FpResult < F >
1730where
1831 F :: Int : CastFrom < u64 > ,
@@ -25,69 +38,67 @@ where
2538 let u1: F = F :: U1 ;
2639 let off = F :: OFF ;
2740
28- /* rm=0 for rounding to nearest, and other values for directed roundings */
2941 let hx = x. to_bits ( ) ;
3042 let mut mant: F :: Int = hx & F :: SIG_MASK ;
3143 let sign: F :: Int = hx & F :: SIGN_MASK ;
3244 let neg = x. is_sign_negative ( ) ;
3345
3446 let mut e: u32 = x. exp ( ) ;
3547
48+ // Handle 0, infinity, NaN, and subnormals
3649 if ( ( e + 1 ) & F :: EXP_SAT ) < 2 {
3750 cold_path ( ) ;
3851
3952 let ix = hx & !F :: SIGN_MASK ;
4053
41- /* 0, inf, nan: we return x + x instead of simply x,
42- to that for x a signaling NaN, it correctly triggers
43- the invalid exception. */
4454 if e == F :: EXP_SAT || ix == zero {
55+ // 0, infinity, NaN; use x + x to trigger exceptions
4556 return FpResult :: ok ( x + x) ;
4657 }
4758
48- let nz = ix. leading_zeros ( ) - 11 ; /* subnormal */
59+ // Normalize subnormals
60+ let nz = ix. leading_zeros ( ) - F :: EXP_BITS ;
4961 mant <<= nz;
5062 mant &= F :: SIG_MASK ;
5163 e = e. wrapping_sub ( nz - 1 ) ;
5264 }
5365
5466 e = e. wrapping_add ( 3072 ) ;
55- let cvt1 = mant | ( F :: Int :: cast_from ( F :: EXP_BIAS ) << F :: SIG_BITS ) ;
56- let mut cvt5 = cvt1 ;
67+ // Set the exponent to 0, z is now [1, 2)
68+ let iz = mant | ( F :: Int :: cast_from ( F :: EXP_BIAS ) << F :: SIG_BITS ) ;
5769
5870 let et: u32 = e / 3 ;
5971 let it: u32 = e % 3 ;
6072
61- /* 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3 */
62- cvt5 += F :: Int :: cast_from ( it ) << F :: SIG_BITS ;
63- cvt5 |= sign;
64- let zz: F = F :: from_bits ( cvt5 ) ;
73+ // 2^(3k+it) <= x < 2^(3k+it+1), with 0 <= it <= 3
74+ // `zz` is `x` reduced to [1, 8)
75+ let izz = ( iz + ( F :: Int :: cast_from ( it ) << F :: SIG_BITS ) ) | sign;
76+ let zz: F = F :: from_bits ( izz ) ;
6577
6678 /* cbrt(x) = cbrt(zz)*2^(et-1365) where 1 <= zz < 8 */
67- let mut isc = F :: ESCALE [ it as usize ] . to_bits ( ) ; // todo: index
68- isc |= sign;
69- let cvt2 = isc;
70- let z: F = F :: from_bits ( cvt1) ;
79+ let isc = F :: ESCALE [ it as usize ] . to_bits ( ) | sign;
80+ let z: F = F :: from_bits ( iz) ;
7181
7282 /* cbrt(zz) = cbrt(z)*isc, where isc encodes 1, 2^(1/3) or 2^(2/3),
7383 and 1 <= z < 2 */
7484 let r: F = F :: ONE / z;
75- let rr: F = r * F :: RSC [ ( ( it as usize ) << 1 ) | neg as usize ] ;
85+ let rr: F = r * F :: RSCALE [ ( ( it as usize ) << 1 ) | neg as usize ] ;
7686 let z2: F = z * z;
7787 let c0: F = F :: C [ 0 ] + z * F :: C [ 1 ] ;
7888 let c2: F = F :: C [ 2 ] + z * F :: C [ 3 ] ;
79- let mut y: F = c0 + z2 * c2;
80- let mut y2: F = y * y;
8189
8290 /* y is an approximation of z^(1/3) */
83- let mut h: F = y2 * ( y * r) - F :: ONE ;
91+ let mut y: F = c0 + z2 * c2;
92+ let mut y2: F = y * y;
8493
8594 /* h determines the error between y and z^(1/3) */
86- y -= ( h * y ) * ( u0 - u1 * h ) ;
95+ let mut h : F = y2 * ( y * r ) - F :: ONE ;
8796
8897 /* The correction y -= (h*y)*(u0 - u1*h) corresponds to a cubic variant
8998 of Newton's method, with the function f(y) = 1-z/y^3. */
90- y *= F :: from_bits ( cvt2) ;
99+ y -= ( h * y) * ( u0 - u1 * h) ;
100+
101+ y *= F :: from_bits ( isc) ;
91102
92103 /* Now y is an approximation of zz^(1/3),
93104 * and rr an approximation of 1/zz. We now perform another iteration of
@@ -194,7 +205,7 @@ pub trait CbrtHelper: Float {
194205 const C : [ Self ; 4 ] ;
195206 const U0 : Self ;
196207 const U1 : Self ;
197- const RSC : [ Self ; 6 ] ;
208+ const RSCALE : [ Self ; 6 ] ;
198209 const OFF : [ Self ; 4 ] ;
199210 const WLIST : [ ( Self , Self ) ; 7 ] ;
200211 const AZMAGIC1 : Self ;
@@ -212,17 +223,23 @@ impl CbrtHelper for f64 {
212223 ] ;
213224
214225 const C : [ Self ; 4 ] = [
226+ // hf64!("0x1.1b850259b99ddp-1"),
227+ // hf64!("0x1.2b9762efeffecp-1"),
228+ // hf64!("-0x1.4af8eb64ea1ecp-3"),
229+ // hf64!("0x1.7590cccfad50bp-6"),
215230 hf64 ! ( "0x1.1b0babccfef9cp-1" ) ,
216231 hf64 ! ( "0x1.2c9a3e94d1da5p-1" ) ,
217232 hf64 ! ( "-0x1.4dc30b1a1ddbap-3" ) ,
218233 hf64 ! ( "0x1.7a8d3e4ec9b07p-6" ) ,
219234 ] ;
220235
236+ // 0.33333333...
221237 const U0 : Self = hf64 ! ( "0x1.5555555555555p-2" ) ;
222238
239+ // 0.22222222...
223240 const U1 : Self = hf64 ! ( "0x1.c71c71c71c71cp-3" ) ;
224241
225- const RSC : [ Self ; 6 ] = [ 1.0 , -1.0 , 0.5 , -0.5 , 0.25 , -0.25 ] ;
242+ const RSCALE : [ Self ; 6 ] = [ 1.0 , -1.0 , 0.5 , -0.5 , 0.25 , -0.25 ] ;
226243
227244 const OFF : [ Self ; 4 ] = [ hf64 ! ( "0x1p-53" ) , 0.0 , 0.0 , 0.0 ] ;
228245
@@ -254,6 +271,51 @@ impl CbrtHelper for f64 {
254271 }
255272}
256273
274+ #[ cfg( f128_enabled) ]
275+ impl CbrtHelper for f128 {
276+ const ESCALE : [ Self ; 3 ] = [
277+ 1.0 ,
278+ hf128 ! ( "0x1.428a2f98d728acf826cc8664b665p+0" ) , /* 2^(1/3) */
279+ hf128 ! ( "0x1.965fea53d6e3c53be1ca3482bf3ap+0" ) , /* 2^(2/3) */
280+ ] ;
281+
282+ const C : [ Self ; 4 ] = [
283+ hf128 ! ( "0x1.1b850223b8bf644fcef50feeced1p-1" ) ,
284+ hf128 ! ( "0x1.2b97635e9e17d5240965cb56dc73p-1" ) ,
285+ hf128 ! ( "-0x1.4af8ec964bbc3767a6cf8ac456cbp-3" ) ,
286+ hf128 ! ( "0x1.7590ceecbb8c4c40d8c5e8b64d6bp-6" ) ,
287+ ] ;
288+
289+ const U0 : Self = 0.3333333333333333333333333333333333333333 ;
290+
291+ const U1 : Self = 0.2222222222222222222222222222222222222222 ;
292+
293+ const RSCALE : [ Self ; 6 ] = [ 1.0 , -1.0 , 0.5 , -0.5 , 0.25 , -0.25 ] ;
294+
295+ const OFF : [ Self ; 4 ] = [ hf128 ! ( "0x1p-53" ) , 0.0 , 0.0 , 0.0 ] ;
296+
297+ // Other rounding modes unsupported for f128
298+ const WLIST : [ ( Self , Self ) ; 7 ] =
299+ [ ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) , ( 0.0 , 0.0 ) ] ;
300+
301+ const AZMAGIC1 : Self = hf128 ! ( "0x1.9b78223aa307cp+1" ) ;
302+ const AZMAGIC2 : Self = hf128 ! ( "0x1.79d15d0e8d59cp+0" ) ;
303+ const AZMAGIC3 : Self = hf128 ! ( "0x1.a202bfc89ddffp+2" ) ;
304+ const AZMAGIC4 : Self = hf128 ! ( "0x1.de87aa837820fp+0" ) ;
305+
306+ fn fma ( self , y : Self , z : Self ) -> Self {
307+ #[ cfg( intrinsics_enabled) ]
308+ {
309+ return unsafe { core:: intrinsics:: fmaf128 ( self , y, z) } ;
310+ }
311+
312+ #[ cfg( not( intrinsics_enabled) ) ]
313+ {
314+ return super :: fmaf128 ( self , y, z) ;
315+ }
316+ }
317+ }
318+
257319#[ cfg( test) ]
258320mod tests {
259321 use super :: * ;
0 commit comments