@@ -1363,6 +1363,7 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
13631363#[ cfg_attr( test, assert_instr( movntdq) ) ]
13641364#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
13651365pub unsafe fn _mm_stream_si128 ( mem_addr : * mut __m128i , a : __m128i ) {
1366+ // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
13661367 crate :: arch:: asm!(
13671368 vps!( "movntdq" , ",{a}" ) ,
13681369 p = in( reg) mem_addr,
@@ -1390,6 +1391,7 @@ pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
13901391#[ cfg_attr( test, assert_instr( movnti) ) ]
13911392#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
13921393pub unsafe fn _mm_stream_si32 ( mem_addr : * mut i32 , a : i32 ) {
1394+ // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
13931395 crate :: arch:: asm!(
13941396 vps!( "movnti" , ",{a:e}" ) , // `:e` for 32bit value
13951397 p = in( reg) mem_addr,
@@ -2627,6 +2629,7 @@ pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
26272629#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26282630#[ allow( clippy:: cast_ptr_alignment) ]
26292631pub unsafe fn _mm_stream_pd ( mem_addr : * mut f64 , a : __m128d ) {
2632+ // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
26302633 crate :: arch:: asm!(
26312634 vps!( "movntpd" , ",{a}" ) ,
26322635 p = in( reg) mem_addr,
@@ -4070,6 +4073,7 @@ mod tests {
40704073 ) ;
40714074 let mut r = _mm_set1_epi8 ( 0 ) ;
40724075 _mm_maskmoveu_si128 ( a, mask, ptr:: addr_of_mut!( r) as * mut i8 ) ;
4076+ _mm_sfence ( ) ;
40734077 let e = _mm_set_epi8 ( 0 , 0 , 9 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ) ;
40744078 assert_eq_m128i ( r, e) ;
40754079 }
@@ -4106,6 +4110,7 @@ mod tests {
41064110 let a = _mm_setr_epi32 ( 1 , 2 , 3 , 4 ) ;
41074111 let mut r = _mm_undefined_si128 ( ) ;
41084112 _mm_stream_si128 ( ptr:: addr_of_mut!( r) , a) ;
4113+ _mm_sfence ( ) ;
41094114 assert_eq_m128i ( r, a) ;
41104115 }
41114116
@@ -4117,6 +4122,7 @@ mod tests {
41174122 let a: i32 = 7 ;
41184123 let mut mem = boxed:: Box :: < i32 > :: new ( -1 ) ;
41194124 _mm_stream_si32 ( ptr:: addr_of_mut!( * mem) , a) ;
4125+ _mm_sfence ( ) ;
41204126 assert_eq ! ( a, * mem) ;
41214127 }
41224128
@@ -4813,6 +4819,7 @@ mod tests {
48134819 let mut mem = Memory { data : [ -1.0 ; 2 ] } ;
48144820
48154821 _mm_stream_pd ( ptr:: addr_of_mut!( mem. data[ 0 ] ) , a) ;
4822+ _mm_sfence ( ) ;
48164823 for i in 0 ..2 {
48174824 assert_eq ! ( mem. data[ i] , get_m128d( a, i) ) ;
48184825 }
0 commit comments