@@ -6,7 +6,7 @@ use libc::{c_char, c_uint};
66use rustc_abi as abi;
77use rustc_abi:: { AddressSpace , Align , HasDataLayout , Size , TargetDataLayout , WrappingRange } ;
88use rustc_codegen_ssa:: MemFlags ;
9- use rustc_codegen_ssa:: common:: { IntPredicate , RealPredicate , TypeKind } ;
9+ use rustc_codegen_ssa:: common:: { IntPredicate , RealPredicate , TypeKind , AtomicRmwBinOp } ;
1010use rustc_codegen_ssa:: mir:: operand:: { OperandRef , OperandValue } ;
1111use rustc_codegen_ssa:: mir:: place:: PlaceRef ;
1212use rustc_codegen_ssa:: traits:: * ;
@@ -546,30 +546,13 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
546546
547547 fn atomic_load (
548548 & mut self ,
549- _ty : & ' ll Type ,
549+ ty : & ' ll Type ,
550550 ptr : & ' ll Value ,
551- _order : AtomicOrdering ,
552- _size : Size ,
551+ order : AtomicOrdering ,
552+ size : Size ,
553553 ) -> & ' ll Value {
554- // core seems to think that nvptx has atomic loads, which is not true for NVVM IR,
555- // therefore our only option is to print that this is not supported then trap.
556- // i have heard of cursed things such as emulating this with __threadfence and volatile loads
557- // but that needs to be experimented with in terms of safety and behavior.
558- // NVVM has explicit intrinsics for adding and subtracting floats which we expose elsewhere
559-
560- // TODO(RDambrosio016): is there a way we can just generate a panic with a message instead
561- // of doing this ourselves? since all panics will be aborts, it should be equivalent
562- // let message = "Atomic Loads are not supported in CUDA.\0";
563-
564- // let vprintf = self.get_intrinsic("vprintf");
565- // let formatlist = self.const_str(Symbol::intern(message)).0;
566- // let valist = self.const_null(self.type_void());
567-
568- // self.call(vprintf, &[formatlist, valist], None);
569-
570- let ( ty, f) = self . get_intrinsic ( "llvm.trap" ) ;
571- self . call ( ty, None , None , f, & [ ] , None , None ) ;
572- unsafe { llvm:: LLVMBuildLoad ( self . llbuilder , ptr, unnamed ( ) ) }
554+ // Since for any A, A | 0 = A, and performing atomics on constant memory is UB in Rust, we can abuse or to perform atomic reads.
555+ self . atomic_rmw ( AtomicRmwBinOp :: AtomicOr , ptr, self . const_int ( ty, 0 ) , order)
573556 }
574557
575558 fn load_operand ( & mut self , place : PlaceRef < ' tcx , & ' ll Value > ) -> OperandRef < ' tcx , & ' ll Value > {
@@ -796,24 +779,13 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
796779
797780 fn atomic_store (
798781 & mut self ,
799- _val : & ' ll Value ,
782+ val : & ' ll Value ,
800783 ptr : & ' ll Value ,
801- _order : AtomicOrdering ,
802- _size : Size ,
784+ order : AtomicOrdering ,
785+ size : Size ,
803786 ) {
804- // see comment in atomic_load
805-
806- // let message = "Atomic Stores are not supported in CUDA.\0";
807-
808- // let vprintf = self.get_intrinsic("vprintf");
809- // let formatlist = self.const_str(Symbol::intern(message)).0;
810- // let valist = self.const_null(self.type_void());
811-
812- // self.call(vprintf, &[formatlist, valist], None);
813- self . abort ( ) ;
814- unsafe {
815- llvm:: LLVMBuildLoad ( self . llbuilder , ptr, UNNAMED ) ;
816- }
787+ // We can exchange *ptr with val, and then discard the result.
788+ self . atomic_rmw ( AtomicRmwBinOp :: AtomicXchg , ptr, val, order) ;
817789 }
818790
819791 fn gep ( & mut self , ty : & ' ll Type , ptr : & ' ll Value , indices : & [ & ' ll Value ] ) -> & ' ll Value {
@@ -1195,13 +1167,65 @@ impl<'ll, 'tcx, 'a> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
11951167 }
11961168 fn atomic_rmw (
11971169 & mut self ,
1198- _op : rustc_codegen_ssa :: common :: AtomicRmwBinOp ,
1199- _dst : & ' ll Value ,
1200- _src : & ' ll Value ,
1201- _order : AtomicOrdering ,
1170+ op : AtomicRmwBinOp ,
1171+ dst : & ' ll Value ,
1172+ src : & ' ll Value ,
1173+ order : AtomicOrdering ,
12021174 ) -> & ' ll Value {
1203- // see cmpxchg comment
1204- self . fatal ( "atomic rmw is not supported" )
1175+ if matches ! ( op, AtomicRmwBinOp :: AtomicNand ) {
1176+ self . fatal ( "Atomic NAND not supported yet!" )
1177+ }
1178+ self . atomic_op (
1179+ dst,
1180+ |builder, dst| {
1181+ // We are in a supported address space - just use ordinary atomics
1182+ unsafe {
1183+ llvm:: LLVMBuildAtomicRMW (
1184+ builder. llbuilder ,
1185+ op,
1186+ dst,
1187+ src,
1188+ crate :: llvm:: AtomicOrdering :: from_generic ( order) ,
1189+ 0 ,
1190+ )
1191+ }
1192+ } ,
1193+ |builder, dst| {
1194+ // Local space is only accessible to the current thread.
1195+ // So, there are no synchronization issues, and we can emulate it using a simple load / compare / store.
1196+ let load: & ' ll Value = unsafe { llvm:: LLVMBuildLoad ( builder. llbuilder , dst, UNNAMED ) } ;
1197+ let next_val = match op{
1198+ AtomicRmwBinOp :: AtomicXchg => src,
1199+ AtomicRmwBinOp :: AtomicAdd => builder. add ( load, src) ,
1200+ AtomicRmwBinOp :: AtomicSub => builder. sub ( load, src) ,
1201+ AtomicRmwBinOp :: AtomicAnd => builder. and ( load, src) ,
1202+ AtomicRmwBinOp :: AtomicNand => {
1203+ let and = builder. and ( load, src) ;
1204+ builder. not ( and)
1205+ } ,
1206+ AtomicRmwBinOp :: AtomicOr => builder. or ( load, src) ,
1207+ AtomicRmwBinOp :: AtomicXor => builder. xor ( load, src) ,
1208+ AtomicRmwBinOp :: AtomicMax => {
1209+ let is_src_bigger = builder. icmp ( IntPredicate :: IntSGT , src, load) ;
1210+ builder. select ( is_src_bigger, src, load)
1211+ }
1212+ AtomicRmwBinOp :: AtomicMin => {
1213+ let is_src_smaller = builder. icmp ( IntPredicate :: IntSLT , src, load) ;
1214+ builder. select ( is_src_smaller, src, load)
1215+ }
1216+ AtomicRmwBinOp :: AtomicUMax => {
1217+ let is_src_bigger = builder. icmp ( IntPredicate :: IntUGT , src, load) ;
1218+ builder. select ( is_src_bigger, src, load)
1219+ } ,
1220+ AtomicRmwBinOp :: AtomicUMin => {
1221+ let is_src_smaller = builder. icmp ( IntPredicate :: IntULT , src, load) ;
1222+ builder. select ( is_src_smaller, src, load)
1223+ }
1224+ } ;
1225+ unsafe { llvm:: LLVMBuildStore ( builder. llbuilder , next_val, dst) } ;
1226+ load
1227+ } ,
1228+ )
12051229 }
12061230
12071231 fn atomic_fence (
0 commit comments