Implement simd_fma and simd_relaxed_fma in const-eval

sayantn · sayantn · commit c4d6b0b8ea2a · 2025-10-26T22:19:28.000+05:30
diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics.rs b/compiler/rustc_const_eval/src/interpret/intrinsics.rs
@@ -25,6 +25,15 @@ use super::{
 };
 use crate::fluent_generated as fluent;
 
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum MulAddType {
+    /// Used with `fma` and `simd_fma`, always uses fused-multiply-add
+    Fused,
+    /// Used with `fmuladd` and `simd_relaxed_fma`, nondeterministically determines whether to use
+    /// fma or simple multiply-add
+    Nondeterministic,
+}
+
 /// Directly returns an `Allocation` containing an absolute path representation of the given type.
 pub(crate) fn alloc_type_name<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> (AllocId, u64) {
     let path = crate::util::type_name(tcx, ty);
@@ -630,14 +639,22 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
                 dest,
                 rustc_apfloat::Round::NearestTiesToEven,
             )?,
-            sym::fmaf16 => self.fma_intrinsic::<Half>(args, dest)?,
-            sym::fmaf32 => self.fma_intrinsic::<Single>(args, dest)?,
-            sym::fmaf64 => self.fma_intrinsic::<Double>(args, dest)?,
-            sym::fmaf128 => self.fma_intrinsic::<Quad>(args, dest)?,
-            sym::fmuladdf16 => self.float_muladd_intrinsic::<Half>(args, dest)?,
-            sym::fmuladdf32 => self.float_muladd_intrinsic::<Single>(args, dest)?,
-            sym::fmuladdf64 => self.float_muladd_intrinsic::<Double>(args, dest)?,
-            sym::fmuladdf128 => self.float_muladd_intrinsic::<Quad>(args, dest)?,
+            sym::fmaf16 => self.float_muladd_intrinsic::<Half>(args, dest, MulAddType::Fused)?,
+            sym::fmaf32 => self.float_muladd_intrinsic::<Single>(args, dest, MulAddType::Fused)?,
+            sym::fmaf64 => self.float_muladd_intrinsic::<Double>(args, dest, MulAddType::Fused)?,
+            sym::fmaf128 => self.float_muladd_intrinsic::<Quad>(args, dest, MulAddType::Fused)?,
+            sym::fmuladdf16 => {
+                self.float_muladd_intrinsic::<Half>(args, dest, MulAddType::Nondeterministic)?
+            }
+            sym::fmuladdf32 => {
+                self.float_muladd_intrinsic::<Single>(args, dest, MulAddType::Nondeterministic)?
+            }
+            sym::fmuladdf64 => {
+                self.float_muladd_intrinsic::<Double>(args, dest, MulAddType::Nondeterministic)?
+            }
+            sym::fmuladdf128 => {
+                self.float_muladd_intrinsic::<Quad>(args, dest, MulAddType::Nondeterministic)?
+            }
 
             // Unsupported intrinsic: skip the return_to_block below.
             _ => return interp_ok(false),
@@ -1038,40 +1055,41 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
         interp_ok(())
     }
 
-    fn fma_intrinsic<F>(
-        &mut self,
-        args: &[OpTy<'tcx, M::Provenance>],
-        dest: &PlaceTy<'tcx, M::Provenance>,
-    ) -> InterpResult<'tcx, ()>
+    fn float_muladd<F>(
+        &self,
+        a: Scalar<M::Provenance>,
+        b: Scalar<M::Provenance>,
+        c: Scalar<M::Provenance>,
+        typ: MulAddType,
+    ) -> InterpResult<'tcx, Scalar<M::Provenance>>
     where
         F: rustc_apfloat::Float + rustc_apfloat::FloatConvert<F> + Into<Scalar<M::Provenance>>,
     {
-        let a: F = self.read_scalar(&args[0])?.to_float()?;
-        let b: F = self.read_scalar(&args[1])?.to_float()?;
-        let c: F = self.read_scalar(&args[2])?.to_float()?;
+        let a: F = a.to_float()?;
+        let b: F = b.to_float()?;
+        let c: F = c.to_float()?;
+
+        let fuse = typ == MulAddType::Fused || M::float_fuse_mul_add(self);
 
-        let res = a.mul_add(b, c).value;
+        let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value };
         let res = self.adjust_nan(res, &[a, b, c]);
-        self.write_scalar(res, dest)?;
-        interp_ok(())
+        interp_ok(res.into())
     }
 
     fn float_muladd_intrinsic<F>(
         &mut self,
         args: &[OpTy<'tcx, M::Provenance>],
         dest: &PlaceTy<'tcx, M::Provenance>,
+        typ: MulAddType,
     ) -> InterpResult<'tcx, ()>
     where
         F: rustc_apfloat::Float + rustc_apfloat::FloatConvert<F> + Into<Scalar<M::Provenance>>,
     {
-        let a: F = self.read_scalar(&args[0])?.to_float()?;
-        let b: F = self.read_scalar(&args[1])?.to_float()?;
-        let c: F = self.read_scalar(&args[2])?.to_float()?;
-
-        let fuse = M::float_fuse_mul_add(self);
+        let a = self.read_scalar(&args[0])?;
+        let b = self.read_scalar(&args[1])?;
+        let c = self.read_scalar(&args[2])?;
 
-        let res = if fuse { a.mul_add(b, c).value } else { ((a * b).value + c).value };
-        let res = self.adjust_nan(res, &[a, b, c]);
+        let res = self.float_muladd::<F>(a, b, c, typ)?;
         self.write_scalar(res, dest)?;
         interp_ok(())
     }
diff --git a/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs b/compiler/rustc_const_eval/src/interpret/intrinsics/simd.rs
@@ -1,5 +1,6 @@
 use either::Either;
 use rustc_abi::Endian;
+use rustc_apfloat::ieee::{Double, Single};
 use rustc_apfloat::{Float, Round};
 use rustc_middle::mir::interpret::{InterpErrorKind, UndefinedBehaviorInfo};
 use rustc_middle::ty::FloatTy;
@@ -8,8 +9,8 @@ use rustc_span::{Symbol, sym};
 use tracing::trace;
 
 use super::{
-    ImmTy, InterpCx, InterpResult, Machine, OpTy, PlaceTy, Provenance, Scalar, Size, interp_ok,
-    throw_ub_format,
+    ImmTy, InterpCx, InterpResult, Machine, MulAddType, OpTy, PlaceTy, Provenance, Scalar, Size,
+    interp_ok, throw_ub_format,
 };
 use crate::interpret::Writeable;
 
@@ -701,6 +702,43 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
                     };
                 }
             }
+            sym::simd_fma | sym::simd_relaxed_fma => {
+                // `simd_fma` should always deterministically use `mul_add`, whereas `relaxed_fma`
+                // is non-deterministic, and can use either `mul_add` or `a * b + c`
+                let typ = match intrinsic_name {
+                    sym::simd_fma => MulAddType::Fused,
+                    sym::simd_relaxed_fma => MulAddType::Nondeterministic,
+                    _ => unreachable!(),
+                };
+
+                let (a, a_len) = self.project_to_simd(&args[0])?;
+                let (b, b_len) = self.project_to_simd(&args[1])?;
+                let (c, c_len) = self.project_to_simd(&args[2])?;
+                let (dest, dest_len) = self.project_to_simd(&dest)?;
+
+                assert_eq!(dest_len, a_len);
+                assert_eq!(dest_len, b_len);
+                assert_eq!(dest_len, c_len);
+
+                for i in 0..dest_len {
+                    let a = self.read_scalar(&self.project_index(&a, i)?)?;
+                    let b = self.read_scalar(&self.project_index(&b, i)?)?;
+                    let c = self.read_scalar(&self.project_index(&c, i)?)?;
+                    let dest = self.project_index(&dest, i)?;
+
+                    let ty::Float(float_ty) = dest.layout.ty.kind() else {
+                        span_bug!(self.cur_span(), "{} operand is not a float", intrinsic_name)
+                    };
+
+                    let val = match float_ty {
+                        FloatTy::F16 => unimplemented!("f16_f128"),
+                        FloatTy::F32 => self.float_muladd::<Single>(a, b, c, typ)?,
+                        FloatTy::F64 => self.float_muladd::<Double>(a, b, c, typ)?,
+                        FloatTy::F128 => unimplemented!("f16_f128"),
+                    };
+                    self.write_scalar(val, &dest)?;
+                }
+            }
 
             // Unsupported intrinsic: skip the return_to_block below.
             _ => return interp_ok(false),
diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs
@@ -290,7 +290,7 @@ pub trait Machine<'tcx>: Sized {
     }
 
     /// Determines whether the `fmuladd` intrinsics fuse the multiply-add or use separate operations.
-    fn float_fuse_mul_add(_ecx: &mut InterpCx<'tcx, Self>) -> bool;
+    fn float_fuse_mul_add(_ecx: &InterpCx<'tcx, Self>) -> bool;
 
     /// Called before a basic block terminator is executed.
     #[inline]
@@ -676,7 +676,7 @@ pub macro compile_time_machine(<$tcx: lifetime>) {
     }
 
     #[inline(always)]
-    fn float_fuse_mul_add(_ecx: &mut InterpCx<$tcx, Self>) -> bool {
+    fn float_fuse_mul_add(_ecx: &InterpCx<$tcx, Self>) -> bool {
         true
     }
 
diff --git a/src/tools/miri/src/intrinsics/simd.rs b/src/tools/miri/src/intrinsics/simd.rs
@@ -1,5 +1,3 @@
-use rand::Rng;
-use rustc_apfloat::Float;
 use rustc_middle::ty;
 use rustc_middle::ty::FloatTy;
 
@@ -83,62 +81,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     this.write_scalar(val, &dest)?;
                 }
             }
-            "fma" | "relaxed_fma" => {
-                let [a, b, c] = check_intrinsic_arg_count(args)?;
-                let (a, a_len) = this.project_to_simd(a)?;
-                let (b, b_len) = this.project_to_simd(b)?;
-                let (c, c_len) = this.project_to_simd(c)?;
-                let (dest, dest_len) = this.project_to_simd(dest)?;
-
-                assert_eq!(dest_len, a_len);
-                assert_eq!(dest_len, b_len);
-                assert_eq!(dest_len, c_len);
-
-                for i in 0..dest_len {
-                    let a = this.read_scalar(&this.project_index(&a, i)?)?;
-                    let b = this.read_scalar(&this.project_index(&b, i)?)?;
-                    let c = this.read_scalar(&this.project_index(&c, i)?)?;
-                    let dest = this.project_index(&dest, i)?;
-
-                    let fuse: bool = intrinsic_name == "fma"
-                        || (this.machine.float_nondet && this.machine.rng.get_mut().random());
-
-                    // Works for f32 and f64.
-                    // FIXME: using host floats to work around https://github.com/rust-lang/miri/issues/2468.
-                    let ty::Float(float_ty) = dest.layout.ty.kind() else {
-                        span_bug!(this.cur_span(), "{} operand is not a float", intrinsic_name)
-                    };
-                    let val = match float_ty {
-                        FloatTy::F16 => unimplemented!("f16_f128"),
-                        FloatTy::F32 => {
-                            let a = a.to_f32()?;
-                            let b = b.to_f32()?;
-                            let c = c.to_f32()?;
-                            let res = if fuse {
-                                a.mul_add(b, c).value
-                            } else {
-                                ((a * b).value + c).value
-                            };
-                            let res = this.adjust_nan(res, &[a, b, c]);
-                            Scalar::from(res)
-                        }
-                        FloatTy::F64 => {
-                            let a = a.to_f64()?;
-                            let b = b.to_f64()?;
-                            let c = c.to_f64()?;
-                            let res = if fuse {
-                                a.mul_add(b, c).value
-                            } else {
-                                ((a * b).value + c).value
-                            };
-                            let res = this.adjust_nan(res, &[a, b, c]);
-                            Scalar::from(res)
-                        }
-                        FloatTy::F128 => unimplemented!("f16_f128"),
-                    };
-                    this.write_scalar(val, &dest)?;
-                }
-            }
             "expose_provenance" => {
                 let [op] = check_intrinsic_arg_count(args)?;
                 let (op, op_len) = this.project_to_simd(op)?;
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
@@ -1324,8 +1324,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
     }
 
     #[inline(always)]
-    fn float_fuse_mul_add(ecx: &mut InterpCx<'tcx, Self>) -> bool {
-        ecx.machine.float_nondet && ecx.machine.rng.get_mut().random()
+    fn float_fuse_mul_add(ecx: &InterpCx<'tcx, Self>) -> bool {
+        ecx.machine.float_nondet && ecx.machine.rng.borrow_mut().random()
     }
 
     #[inline(always)]

Original file line number	Diff line number	Diff line change
`@@ -290,7 +290,7 @@ pub trait Machine<'tcx>: Sized {`
`290`	`290`	`}`
`291`	`291`
`292`	`292`	/// Determines whether the `fmuladd` intrinsics fuse the multiply-add or use separate operations.
`293`		`- fn float_fuse_mul_add(_ecx: &mut InterpCx<'tcx, Self>) -> bool;`
	`293`	`+ fn float_fuse_mul_add(_ecx: &InterpCx<'tcx, Self>) -> bool;`
`294`	`294`
`295`	`295`	`/// Called before a basic block terminator is executed.`
`296`	`296`	`#[inline]`
`@@ -676,7 +676,7 @@ pub macro compile_time_machine(<$tcx: lifetime>) {`
`676`	`676`	`}`
`677`	`677`
`678`	`678`	`#[inline(always)]`
`679`		`- fn float_fuse_mul_add(_ecx: &mut InterpCx<$tcx, Self>) -> bool {`
	`679`	`+ fn float_fuse_mul_add(_ecx: &InterpCx<$tcx, Self>) -> bool {`
`680`	`680`	`true`
`681`	`681`	`}`
`682`	`682`
Original file line number	Diff line number	Diff line change
`@@ -1324,8 +1324,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {`
`1324`	`1324`	`}`
`1325`	`1325`
`1326`	`1326`	`#[inline(always)]`
`1327`		`- fn float_fuse_mul_add(ecx: &mut InterpCx<'tcx, Self>) -> bool {`
`1328`		`- ecx.machine.float_nondet && ecx.machine.rng.get_mut().random()`
	`1327`	`+ fn float_fuse_mul_add(ecx: &InterpCx<'tcx, Self>) -> bool {`
	`1328`	`+ ecx.machine.float_nondet && ecx.machine.rng.borrow_mut().random()`
`1329`	`1329`	`}`
`1330`	`1330`
`1331`	`1331`	`#[inline(always)]`