@@ -1098,7 +1098,10 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
10981098/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps)
10991099#[ inline]
11001100#[ target_feature( enable = "sse" ) ]
1101- #[ cfg_attr( test, assert_instr( movmskps) ) ]
1101+ // FIXME: LLVM9 trunk has the following bug:
1102+ // https://github.com/rust-lang/stdarch/issues/794
1103+ // so we only temporarily test this on i686 and x86_64 but not on i586:
1104+ #[ cfg_attr( all( test, target_feature = "sse2" ) , assert_instr( movmskps) ) ]
11021105#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
11031106pub unsafe fn _mm_movemask_ps ( a : __m128 ) -> i32 {
11041107 movmskps ( a)
@@ -1109,21 +1112,7 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
11091112/// from `a`.
11101113#[ inline]
11111114#[ target_feature( enable = "sse" ) ]
1112- #[ cfg_attr(
1113- all(
1114- test,
1115- any(
1116- target_arch = "x86_64" ,
1117- all( target_arch = "x86" , target_feature = "sse2" )
1118- )
1119- ) ,
1120- assert_instr( movhps)
1121- ) ]
1122- // FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps`
1123- #[ cfg_attr(
1124- all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
1125- assert_instr( shufps)
1126- ) ]
1115+ #[ cfg_attr( test, assert_instr( movhps) ) ]
11271116// TODO: this function is actually not limited to floats, but that's what
11281117// what matches the C type most closely: `(__m128, *const __m64) -> __m128`.
11291118pub unsafe fn _mm_loadh_pi ( a : __m128 , p : * const __m64 ) -> __m128 {
@@ -1137,16 +1126,7 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
11371126/// is copied from the upper half of `a`.
11381127#[ inline]
11391128#[ target_feature( enable = "sse" ) ]
1140- #[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( movlps) ) ]
1141- #[ cfg_attr(
1142- all( test, target_arch = "x86" , target_feature = "sse2" ) ,
1143- assert_instr( movlps)
1144- ) ]
1145- // FIXME: On 32-bit targets without SSE2, it just generates two `movss`...
1146- #[ cfg_attr(
1147- all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
1148- assert_instr( movss)
1149- ) ]
1129+ #[ cfg_attr( test, assert_instr( movlps) ) ]
11501130pub unsafe fn _mm_loadl_pi ( a : __m128 , p : * const __m64 ) -> __m128 {
11511131 let q = p as * const f32x2 ;
11521132 let b: f32x2 = * q;
0 commit comments