1919//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
2020
2121use crate :: core_arch:: x86:: * ;
22- use crate :: intrinsics:: simd:: simd_fma;
22+ use crate :: intrinsics:: simd:: { simd_fma, simd_insert, simd_neg} ;
23+ use crate :: intrinsics:: { fmaf32, fmaf64} ;
2324
2425#[ cfg( test) ]
2526use stdarch_test:: assert_instr;
@@ -86,7 +87,7 @@ pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
8687 simd_insert ! (
8788 a,
8889 0 ,
89- _mm_cvtsd_f64( a) . mul_add ( _mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
90+ fmaf64 ( _mm_cvtsd_f64( a) , _mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
9091 )
9192}
9293
@@ -104,7 +105,7 @@ pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
104105 simd_insert ! (
105106 a,
106107 0 ,
107- _mm_cvtss_f32( a) . mul_add ( _mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
108+ fmaf32 ( _mm_cvtss_f32( a) , _mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
108109 )
109110}
110111
@@ -222,7 +223,7 @@ pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
222223 simd_insert ! (
223224 a,
224225 0 ,
225- _mm_cvtsd_f64( a) . mul_add ( _mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
226+ fmaf64 ( _mm_cvtsd_f64( a) , _mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
226227 )
227228}
228229
@@ -240,7 +241,7 @@ pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
240241 simd_insert ! (
241242 a,
242243 0 ,
243- _mm_cvtss_f32( a) . mul_add ( _mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
244+ fmaf32 ( _mm_cvtss_f32( a) , _mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
244245 )
245246}
246247
@@ -358,7 +359,7 @@ pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
358359 simd_insert ! (
359360 a,
360361 0 ,
361- _mm_cvtsd_f64( a) . mul_add ( -_mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
362+ fmaf64 ( _mm_cvtsd_f64( a) , -_mm_cvtsd_f64( b) , _mm_cvtsd_f64( c) )
362363 )
363364}
364365
@@ -376,7 +377,7 @@ pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
376377 simd_insert ! (
377378 a,
378379 0 ,
379- _mm_cvtss_f32( a) . mul_add ( -_mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
380+ fmaf32 ( _mm_cvtss_f32( a, -_mm_cvtss_f32( b) , _mm_cvtss_f32( c) )
380381 )
381382}
382383
@@ -447,7 +448,7 @@ pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
447448 simd_insert ! (
448449 a,
449450 0 ,
450- _mm_cvtsd_f64( a) . mul_add ( -_mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
451+ fmaf64 ( _mm_cvtsd_f64( a) , -_mm_cvtsd_f64( b) , -_mm_cvtsd_f64( c) )
451452 )
452453}
453454
@@ -466,7 +467,7 @@ pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
466467 simd_insert ! (
467468 a,
468469 0 ,
469- _mm_cvtss_f32( a) . mul_add ( -_mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
470+ fmaf32 ( _mm_cvtss_f32( a) , -_mm_cvtss_f32( b) , -_mm_cvtss_f32( c) )
470471 )
471472}
472473
0 commit comments