@@ -805,13 +805,42 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
805805 }
806806
807807 fn fptoui_sat ( & mut self , val : & ' ll Value , dest_ty : & ' ll Type ) -> & ' ll Value {
808- // NVVM does not have support for saturated conversion. Setting rustc flag
809- // `-Z saturating_float_casts=false` falls back to non-saturated, UB-prone
810- // conversion, and should prevent this codegen. Otherwise, fall back to UB
811- // prone conversion.
812- self . cx ( ) . sess ( ) . dcx ( )
813- . warn ( "Saturated float to int conversion is not supported on NVVM. Defaulting to UB prone conversion." ) ;
814- self . fptoui ( val, dest_ty)
808+ // NVVM does not support saturating casts, however, they are relatively simple to implement.
809+ // (at least for unsigned ints). So, we emulate them here.
810+
811+ // In order to clamp the value, we need to know it's type.
812+ let val_ty = self . val_ty ( val) ;
813+ // Find the min / max intrinsics
814+ let ( min, max) = match self . cx ( ) . float_width ( val_ty) {
815+ 64 => ( "__nv_fmin" , "__nv_fmax" ) ,
816+ 32 => ( "__nv_fminf" , "__nv_fmaxf" ) ,
817+ _ => {
818+ self . cx ( ) . sess ( ) . dcx ( )
819+ . warn ( "Saturated float to int conversion is not supported in NVVM for type {val_ty:?}. Defaulting to UB prone conversion." ) ;
820+ return self . fptoui ( val, dest_ty) ;
821+ }
822+ } ;
823+ let ( max_ty, max) = self . cx ( ) . get_intrinsic ( max) ;
824+ let ( min_ty, min) = self . cx ( ) . get_intrinsic ( min) ;
825+ // Find the zero value, and the max value of a given int.
826+ let zero = self . const_real ( val_ty, 0.0 ) ;
827+ let max_value = match self . int_width ( dest_ty) {
828+ 8 => u8:: MAX as f64 ,
829+ 16 => u16:: MAX as f64 ,
830+ 32 => u32:: MAX as f64 ,
831+ 64 => u64:: MAX as f64 ,
832+ 128 => u128:: MAX as f64 ,
833+ _ => todo ! ( "Unsupported int type {dest_ty:?}" ) ,
834+ } ;
835+ let max_value = self . const_real ( val_ty, max_value) ;
836+ // Compute max(val, 0). This will clamp negative values to zero **AND**
837+ // replace NaNs with 0s(just like how Rust is specified to behave)
838+ let res = self . call ( max_ty, None , None , max, & [ val, zero] , None , None ) ;
839+ // Clamp all values higher than max to max
840+ let res = self . call ( min_ty, None , None , min, & [ res, max_value] , None , None ) ;
841+ // Now, we know that `res` is non-nan, and in range (min, max). So, it is well-defined
842+ // for all inputs :D!
843+ self . fptoui ( res, dest_ty)
815844 }
816845
817846 fn fptosi_sat ( & mut self , val : & ' ll Value , dest_ty : & ' ll Type ) -> & ' ll Value {
0 commit comments