Skip to content

Commit 8bddb04

Browse files
Fix swizzle/short on SSE2
1 parent 0367803 commit 8bddb04

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

include/xsimd/arch/xsimd_sse2.hpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,17 +1693,19 @@ namespace xsimd
16931693
// permute within each lane
16941694
constexpr auto mask_lo = detail::mod_shuffle(V0, V1, V2, V3);
16951695
constexpr auto mask_hi = detail::mod_shuffle(V4, V5, V6, V7);
1696-
__m128i lo = _mm_shufflelo_epi16(self, mask_lo);
1697-
__m128i hi = _mm_shufflehi_epi16(self, mask_hi);
1696+
__m128i lol = _mm_shufflelo_epi16(self, mask_lo);
1697+
__m128i loh = _mm_shufflelo_epi16(self, mask_hi);
1698+
__m128i hil = _mm_shufflehi_epi16(self, mask_lo);
1699+
__m128i hih = _mm_shufflehi_epi16(self, mask_hi);
16981700

1699-
__m128i lo_lo = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(lo), _mm_castsi128_pd(lo), _MM_SHUFFLE2(0, 0)));
1700-
__m128i hi_hi = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(hi), _mm_castsi128_pd(hi), _MM_SHUFFLE2(1, 1)));
1701+
__m128i lo = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(lol), _mm_castsi128_pd(loh), _MM_SHUFFLE2(0, 0)));
1702+
__m128i hi = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(hil), _mm_castsi128_pd(hih), _MM_SHUFFLE2(1, 1)));
17011703

17021704
// mask to choose the right lane
17031705
batch_bool_constant<uint16_t, A, (V0 < 4), (V1 < 4), (V2 < 4), (V3 < 4), (V4 < 4), (V5 < 4), (V6 < 4), (V7 < 4)> blend_mask;
17041706

17051707
// blend the two permutes
1706-
return select(blend_mask, batch<uint16_t, A>(lo_lo), batch<uint16_t, A>(hi_hi));
1708+
return select(blend_mask, batch<uint16_t, A>(lo), batch<uint16_t, A>(hi));
17071709
}
17081710

17091711
template <class A, uint16_t V0, uint16_t V1, uint16_t V2, uint16_t V3, uint16_t V4, uint16_t V5, uint16_t V6, uint16_t V7>

0 commit comments

Comments
 (0)