11/* SPDX-License-Identifier: MIT */
22/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */
33
4- use core:: { f32, f64} ;
5-
6- use super :: super :: fenv:: {
7- FE_INEXACT , FE_TONEAREST , FE_UNDERFLOW , feclearexcept, fegetround, feraiseexcept, fetestexcept,
8- } ;
9- use super :: super :: support:: { DInt , HInt , IntTy } ;
4+ use super :: super :: support:: { DInt , FpResult , HInt , IntTy , Round , Status } ;
105use super :: super :: { CastFrom , CastInto , DFloat , Float , HFloat , Int , MinInt } ;
116
127/// Fused multiply-add that works when there is not a larger float size available. Currently this
138/// is still specialized only for `f64`. Computes `(x * y) + z`.
149#[ cfg_attr( all( test, assert_no_panic) , no_panic:: no_panic) ]
1510pub fn fma < F > ( x : F , y : F , z : F ) -> F
1611where
17- F : Float + FmaHelper ,
12+ F : Float ,
13+ F : CastFrom < F :: SignedInt > ,
14+ F : CastFrom < i8 > ,
15+ F :: Int : HInt ,
16+ u32 : CastInto < F :: Int > ,
17+ {
18+ fma_round ( x, y, z, Round :: Nearest ) . val
19+ }
20+
21+ pub fn fma_round < F > ( x : F , y : F , z : F , _round : Round ) -> FpResult < F >
22+ where
23+ F : Float ,
1824 F : CastFrom < F :: SignedInt > ,
1925 F : CastFrom < i8 > ,
2026 F :: Int : HInt ,
@@ -30,16 +36,16 @@ where
3036
3137 if nx. is_zero_nan_inf ( ) || ny. is_zero_nan_inf ( ) {
3238 // Value will overflow, defer to non-fused operations.
33- return x * y + z;
39+ return FpResult :: ok ( x * y + z) ;
3440 }
3541
3642 if nz. is_zero_nan_inf ( ) {
3743 if nz. is_zero ( ) {
3844 // Empty add component means we only need to multiply.
39- return x * y;
45+ return FpResult :: ok ( x * y) ;
4046 }
4147 // `z` is NaN or infinity, which sets the result.
42- return z ;
48+ return FpResult :: ok ( z ) ;
4349 }
4450
4551 // multiply: r = x * y
@@ -147,7 +153,7 @@ where
147153 }
148154 } else {
149155 // exact +/- 0.0
150- return x * y + z;
156+ return FpResult :: ok ( x * y + z) ;
151157 }
152158
153159 e -= d;
@@ -168,6 +174,8 @@ where
168174 // Unbiased exponent for the maximum value of `r`
169175 let max_pow = F :: BITS - 1 + F :: EXP_BIAS ;
170176
177+ let mut status = Status :: OK ;
178+
171179 if e < -( max_pow as i32 - 2 ) {
172180 // Result is subnormal before rounding
173181 if e == -( max_pow as i32 - 1 ) {
@@ -178,7 +186,9 @@ where
178186
179187 if r == c {
180188 // Min normal after rounding,
181- return r. raise_underflow_as_min_positive ( ) ;
189+ status. set_underflow ( true ) ;
190+ r = F :: MIN_POSITIVE_NORMAL . copysign ( r) ;
191+ return FpResult :: new ( r, status) ;
182192 }
183193
184194 if ( rhi << ( F :: SIG_BITS + 1 ) ) != zero {
@@ -195,7 +205,7 @@ where
195205
196206 // Remove the top bit
197207 r = F :: cast_from ( 2i8 ) * r - c;
198- r += r . raise_underflow_ret_zero ( ) ;
208+ status . set_underflow ( true ) ;
199209 }
200210 } else {
201211 // Only round once when scaled
@@ -212,12 +222,22 @@ where
212222 }
213223
214224 // Use our exponent to scale the final value.
215- super :: scalbn ( r, e)
225+ FpResult :: new ( super :: scalbn ( r, e) , status )
216226}
217227
218228/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
219229/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
220230pub fn fma_wide < F , B > ( x : F , y : F , z : F ) -> F
231+ where
232+ F : Float + HFloat < D = B > ,
233+ B : Float + DFloat < H = F > ,
234+ B :: Int : CastInto < i32 > ,
235+ i32 : CastFrom < i32 > ,
236+ {
237+ fma_wide_round ( x, y, z, Round :: Nearest ) . val
238+ }
239+
240+ pub fn fma_wide_round < F , B > ( x : F , y : F , z : F , round : Round ) -> FpResult < F >
221241where
222242 F : Float + HFloat < D = B > ,
223243 B : Float + DFloat < H = F > ,
@@ -244,24 +264,26 @@ where
244264 // Or the result is exact
245265 || ( result - xy == zb && result - zb == xy)
246266 // Or the mode is something other than round to nearest
247- || fegetround ( ) != FE_TONEAREST
267+ || round != Round :: Nearest
248268 {
249269 let min_inexact_exp = ( B :: EXP_BIAS as i32 + F :: EXP_MIN_SUBNORM ) as u32 ;
250270 let max_inexact_exp = ( B :: EXP_BIAS as i32 + F :: EXP_MIN ) as u32 ;
251271
252- if ( min_inexact_exp..max_inexact_exp) . contains ( & re) && fetestexcept ( FE_INEXACT ) != 0 {
253- feclearexcept ( FE_INEXACT ) ;
254- // prevent `xy + vz` from being CSE'd with `xy + z` above
255- let vz: F = force_eval ! ( z) ;
256- result = xy + vz. widen ( ) ;
257- if fetestexcept ( FE_INEXACT ) != 0 {
258- feraiseexcept ( FE_UNDERFLOW ) ;
272+ let mut status = Status :: OK ;
273+
274+ if ( min_inexact_exp..max_inexact_exp) . contains ( & re) && status. inexact ( ) {
275+ // This branch is never hit; requires previous operations to set a status
276+ status. set_inexact ( false ) ;
277+
278+ result = xy + z. widen ( ) ;
279+ if status. inexact ( ) {
280+ status. set_underflow ( true ) ;
259281 } else {
260- feraiseexcept ( FE_INEXACT ) ;
282+ status . set_inexact ( true ) ;
261283 }
262284 }
263285
264- return result. narrow ( ) ;
286+ return FpResult { val : result. narrow ( ) , status } ;
265287 }
266288
267289 let neg = ui >> ( B :: BITS - 1 ) != IntTy :: < B > :: ZERO ;
@@ -272,7 +294,7 @@ where
272294 ui -= one;
273295 }
274296
275- B :: from_bits ( ui) . narrow ( )
297+ FpResult :: ok ( B :: from_bits ( ui) . narrow ( ) )
276298}
277299
278300/// Representation of `F` that has handled subnormals.
@@ -337,49 +359,13 @@ impl<F: Float> Norm<F> {
337359 }
338360}
339361
340- /// Type-specific helpers that are not needed outside of fma.
341- pub trait FmaHelper {
342- /// Raise underflow and return the minimum positive normal value with the sign of `self`.
343- fn raise_underflow_as_min_positive ( self ) -> Self ;
344- /// Raise underflow and return zero.
345- fn raise_underflow_ret_zero ( self ) -> Self ;
346- }
347-
348- impl FmaHelper for f64 {
349- fn raise_underflow_as_min_positive ( self ) -> Self {
350- /* min normal after rounding, underflow depends
351- * on arch behaviour which can be imitated by
352- * a double to float conversion */
353- let fltmin: f32 = ( hf64 ! ( "0x0.ffffff8p-63" ) * f32:: MIN_POSITIVE as f64 * self ) as f32 ;
354- f64:: MIN_POSITIVE / f32:: MIN_POSITIVE as f64 * fltmin as f64
355- }
356-
357- fn raise_underflow_ret_zero ( self ) -> Self {
358- /* raise underflow portably, such that it
359- * cannot be optimized away */
360- let tiny: f64 = f64:: MIN_POSITIVE / f32:: MIN_POSITIVE as f64 * self ;
361- ( tiny * tiny) * ( self - self )
362- }
363- }
364-
365- #[ cfg( f128_enabled) ]
366- impl FmaHelper for f128 {
367- fn raise_underflow_as_min_positive ( self ) -> Self {
368- f128:: MIN_POSITIVE . copysign ( self )
369- }
370-
371- fn raise_underflow_ret_zero ( self ) -> Self {
372- f128:: ZERO
373- }
374- }
375-
376362#[ cfg( test) ]
377363mod tests {
378364 use super :: * ;
379365
380366 fn spec_test < F > ( )
381367 where
382- F : Float + FmaHelper ,
368+ F : Float ,
383369 F : CastFrom < F :: SignedInt > ,
384370 F : CastFrom < i8 > ,
385371 F :: Int : HInt ,
@@ -401,6 +387,29 @@ mod tests {
401387 #[ test]
402388 fn spec_test_f64 ( ) {
403389 spec_test :: < f64 > ( ) ;
390+
391+ let expect_underflow = [
392+ (
393+ hf64 ! ( "0x1.0p-1070" ) ,
394+ hf64 ! ( "0x1.0p-1070" ) ,
395+ hf64 ! ( "0x1.ffffffffffffp-1023" ) ,
396+ hf64 ! ( "0x0.ffffffffffff8p-1022" ) ,
397+ ) ,
398+ (
399+ // FIXME: we raise underflow but this should only be inexact (based on C and
400+ // `rustc_apfloat`).
401+ hf64 ! ( "0x1.0p-1070" ) ,
402+ hf64 ! ( "0x1.0p-1070" ) ,
403+ hf64 ! ( "-0x1.0p-1022" ) ,
404+ hf64 ! ( "-0x1.0p-1022" ) ,
405+ ) ,
406+ ] ;
407+
408+ for ( x, y, z, res) in expect_underflow {
409+ let FpResult { val, status } = fma_round ( x, y, z, Round :: Nearest ) ;
410+ assert_biteq ! ( val, res) ;
411+ assert_eq ! ( status, Status :: UNDERFLOW ) ;
412+ }
404413 }
405414
406415 #[ test]
0 commit comments