@@ -735,6 +735,117 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
735735 }
736736 }
737737
738+ "llvm.x86.fma.vfmaddsub.ps"
739+ | "llvm.x86.fma.vfmaddsub.pd"
740+ | "llvm.x86.fma.vfmaddsub.ps.256"
741+ | "llvm.x86.fma.vfmaddsub.pd.256" => {
742+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
743+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
744+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
745+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
746+ intrinsic_args ! ( fx, args => ( a, b, c) ; intrinsic) ;
747+
748+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
749+ assert_eq ! ( a. layout( ) , c. layout( ) ) ;
750+ let layout = a. layout ( ) ;
751+
752+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
753+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
754+ assert ! ( lane_ty. is_floating_point( ) ) ;
755+ assert ! ( ret_lane_ty. is_floating_point( ) ) ;
756+ assert_eq ! ( lane_count, ret_lane_count) ;
757+ let ret_lane_layout = fx. layout_of ( ret_lane_ty) ;
758+
759+ for idx in 0 ..lane_count {
760+ let a_lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
761+ let b_lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
762+ let c_lane = c. value_lane ( fx, idx) . load_scalar ( fx) ;
763+
764+ let mul = fx. bcx . ins ( ) . fmul ( a_lane, b_lane) ;
765+ let res = if idx & 1 == 0 {
766+ fx. bcx . ins ( ) . fsub ( mul, c_lane)
767+ } else {
768+ fx. bcx . ins ( ) . fadd ( mul, c_lane)
769+ } ;
770+
771+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
772+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
773+ }
774+ }
775+
776+ "llvm.x86.fma.vfmsubadd.ps"
777+ | "llvm.x86.fma.vfmsubadd.pd"
778+ | "llvm.x86.fma.vfmsubadd.ps.256"
779+ | "llvm.x86.fma.vfmsubadd.pd.256" => {
780+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
781+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
782+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
783+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
784+ intrinsic_args ! ( fx, args => ( a, b, c) ; intrinsic) ;
785+
786+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
787+ assert_eq ! ( a. layout( ) , c. layout( ) ) ;
788+ let layout = a. layout ( ) ;
789+
790+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
791+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
792+ assert ! ( lane_ty. is_floating_point( ) ) ;
793+ assert ! ( ret_lane_ty. is_floating_point( ) ) ;
794+ assert_eq ! ( lane_count, ret_lane_count) ;
795+ let ret_lane_layout = fx. layout_of ( ret_lane_ty) ;
796+
797+ for idx in 0 ..lane_count {
798+ let a_lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
799+ let b_lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
800+ let c_lane = c. value_lane ( fx, idx) . load_scalar ( fx) ;
801+
802+ let mul = fx. bcx . ins ( ) . fmul ( a_lane, b_lane) ;
803+ let res = if idx & 1 == 0 {
804+ fx. bcx . ins ( ) . fadd ( mul, c_lane)
805+ } else {
806+ fx. bcx . ins ( ) . fsub ( mul, c_lane)
807+ } ;
808+
809+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
810+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
811+ }
812+ }
813+
814+ "llvm.x86.fma.vfnmadd.ps"
815+ | "llvm.x86.fma.vfnmadd.pd"
816+ | "llvm.x86.fma.vfnmadd.ps.256"
817+ | "llvm.x86.fma.vfnmadd.pd.256" => {
818+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
819+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
820+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
821+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
822+ intrinsic_args ! ( fx, args => ( a, b, c) ; intrinsic) ;
823+
824+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
825+ assert_eq ! ( a. layout( ) , c. layout( ) ) ;
826+ let layout = a. layout ( ) ;
827+
828+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
829+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
830+ assert ! ( lane_ty. is_floating_point( ) ) ;
831+ assert ! ( ret_lane_ty. is_floating_point( ) ) ;
832+ assert_eq ! ( lane_count, ret_lane_count) ;
833+ let ret_lane_layout = fx. layout_of ( ret_lane_ty) ;
834+
835+ for idx in 0 ..lane_count {
836+ let a_lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
837+ let b_lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
838+ let c_lane = c. value_lane ( fx, idx) . load_scalar ( fx) ;
839+
840+ let mul = fx. bcx . ins ( ) . fmul ( a_lane, b_lane) ;
841+ let neg_mul = fx. bcx . ins ( ) . fneg ( mul) ;
842+ let res = fx. bcx . ins ( ) . fadd ( neg_mul, c_lane) ;
843+
844+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
845+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
846+ }
847+ }
848+
738849 "llvm.x86.sse42.pcmpestri128" => {
739850 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
740851 intrinsic_args ! ( fx, args => ( a, la, b, lb, _imm8) ; intrinsic) ;
0 commit comments