@@ -1144,18 +1144,20 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
11441144/// ```
11451145#[ inline]
11461146#[ target_feature( enable = "sse" ) ]
1147- // TODO: generates MOVHPD if the CPU supports SSE2.
1148- // #[cfg_attr(test, assert_instr(movhps))]
1149- #[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( movhpd) ) ]
1150- // 32-bit codegen does not generate `movhps` or `movhpd`, but instead
1151- // `movsd` followed by `unpcklpd` (or `movss'/`unpcklps` if there's no SSE2).
11521147#[ cfg_attr(
1153- all( test, target_arch = "x86" , target_feature = "sse2" ) ,
1154- assert_instr( movlhps)
1148+ all(
1149+ test,
1150+ any(
1151+ target_arch = "x86_64" ,
1152+ all( target_arch = "x86" , target_feature = "sse2" )
1153+ )
1154+ ) ,
1155+ assert_instr( movhpd)
11551156) ]
1157+ // FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps`
11561158#[ cfg_attr(
11571159 all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
1158- assert_instr( unpcklps )
1160+ assert_instr( shufps )
11591161) ]
11601162// TODO: This function is actually not limited to floats, but that's what
11611163// what matches the C type most closely: (__m128, *const __m64) -> __m128
@@ -1202,20 +1204,16 @@ pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
12021204/// ```
12031205#[ inline]
12041206#[ target_feature( enable = "sse" ) ]
1205- // TODO: generates MOVLPD if the CPU supports SSE2.
1206- // #[cfg_attr(test, assert_instr(movlps))]
12071207#[ cfg_attr( all( test, target_arch = "x86_64" ) , assert_instr( movlpd) ) ]
1208- // On 32-bit targets with SSE2, it just generates two `movsd`.
12091208#[ cfg_attr(
12101209 all( test, target_arch = "x86" , target_feature = "sse2" ) ,
1211- assert_instr( movsd )
1210+ assert_instr( movlpd )
12121211) ]
1213- // It should really generate "movlps", but oh well ...
1212+ // FIXME: On 32-bit targets without SSE2, it just generates two `movss` ...
12141213#[ cfg_attr(
12151214 all( test, target_arch = "x86" , not( target_feature = "sse2" ) ) ,
12161215 assert_instr( movss)
12171216) ]
1218- // TODO: Like _mm_loadh_pi, this also isn't limited to floats.
12191217pub unsafe fn _mm_loadl_pi ( a : __m128 , p : * const __m64 ) -> __m128 {
12201218 let q = p as * const f32x2 ;
12211219 let b: f32x2 = * q;
0 commit comments