@@ -727,13 +727,29 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
727727 llvm:: LLVMSetVolatile ( store, llvm:: True ) ;
728728 }
729729 if flags. contains ( MemFlags :: NONTEMPORAL ) {
730- // According to LLVM [1] building a nontemporal store must
731- // *always* point to a metadata value of the integer 1.
732- //
733- // [1]: https://llvm.org/docs/LangRef.html#store-instruction
734- let one = self . cx . const_i32 ( 1 ) ;
735- let node = llvm:: LLVMMDNodeInContext ( self . cx . llcx , & one, 1 ) ;
736- llvm:: LLVMSetMetadata ( store, llvm:: MD_nontemporal as c_uint , node) ;
730+ // Make sure that the current target architectures supports "sane" non-temporal
731+ // stores, i.e., non-temporal stores that are equivalent to regular stores except
732+ // for performance. LLVM doesn't seem to care about this, and will happily treat
733+ // `!nontemporal` stores as-if they were normal stores (for reordering optimizations
734+ // etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
735+ // regular stores but require special fences.
736+ // So we keep a list of architectures where `!nontemporal` is known to be truly just
737+ // a hint, and use regular stores everywhere else.
738+ // (In the future, we could alternatively ensure that an sfence gets emitted after a sequence of movnt
739+ // before any kind of synchronizing operation. But it's not clear how to do that with LLVM.)
740+ const WELL_BEHAVED_NONTEMPORAL_ARCHS : & [ & str ] = & [ "aarch64" , "arm" ] ;
741+
742+ let use_nontemporal =
743+ WELL_BEHAVED_NONTEMPORAL_ARCHS . contains ( & & * self . cx . tcx . sess . target . arch ) ;
744+ if use_nontemporal {
745+ // According to LLVM [1] building a nontemporal store must
746+ // *always* point to a metadata value of the integer 1.
747+ //
748+ // [1]: https://llvm.org/docs/LangRef.html#store-instruction
749+ let one = self . cx . const_i32 ( 1 ) ;
750+ let node = llvm:: LLVMMDNodeInContext ( self . cx . llcx , & one, 1 ) ;
751+ llvm:: LLVMSetMetadata ( store, llvm:: MD_nontemporal as c_uint , node) ;
752+ }
737753 }
738754 store
739755 }
0 commit comments