Skip to content

Commit abe9947

Browse files
Provide an MSVC compatible usage of various mask-related intrinsics
Downgrade to AVX2 when MSVC doesn't support the AVX512 builtin. This could be improved based on the compiler version, better safe than sorry here.
1 parent 59dd061 commit abe9947

File tree

4 files changed

+50
-11
lines changed

4 files changed

+50
-11
lines changed

include/xsimd/types/xsimd_avx512_float.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,20 @@ namespace xsimd
579579

580580
static batch_type select(const batch_bool_type& cond, const batch_type& a, const batch_type& b)
581581
{
582+
#if !defined(_MSC_VER)
582583
return _mm512_mask_blend_ps(cond, b, a);
584+
#else
585+
__m512i mcondi = _mm512_maskz_broadcastd_epi32 ((__mmask16)cond, _mm_set1_epi32(~0));
586+
__m512 mcond = *reinterpret_cast<__m512*>(&mcondi);
587+
XSIMD_SPLITPS_AVX512(mcond);
588+
XSIMD_SPLITPS_AVX512(a);
589+
XSIMD_SPLITPS_AVX512(b);
590+
591+
auto res_lo = _mm256_blendv_ps(b_low, a_low, mcond_low);
592+
auto res_hi = _mm256_blendv_ps(b_high, a_high, mcond_high);
593+
594+
XSIMD_RETURN_MERGEDPS_AVX(res_lo, res_hi);
595+
#endif
583596
}
584597

585598
static batch_bool_type isnan(const batch_type& x)

include/xsimd/types/xsimd_avx512_int16.hpp

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -316,19 +316,17 @@ namespace xsimd
316316

317317
static batch_type select(const batch_bool_type& cond, const batch_type& a, const batch_type& b)
318318
{
319-
#if defined(XSIMD_AVX512BW_AVAILABLE)
320-
// Some compilers are not happy with passing directly a and b to the intrinsics
321-
// See https://github.com/xtensor-stack/xsimd/issues/315
322-
__m512i ma = a;
323-
__m512i mb = b;
324-
return _mm512_mask_blend_epi16(cond, mb, ma);
319+
#if defined(XSIMD_AVX512BW_AVAILABLE) && !defined(_MSC_VER)
320+
auto res = _mm512_mask_blend_epi16((__mmask32)cond, (__m512i)b, (__m512i)a);
321+
return batch_type(res);
325322
#else
326-
XSIMD_SPLIT_AVX512(cond);
323+
__m512i mcond = _mm512_maskz_broadcastw_epi16((__mmask32)cond, _mm_set1_epi32(~0));
324+
XSIMD_SPLIT_AVX512(mcond);
327325
XSIMD_SPLIT_AVX512(a);
328326
XSIMD_SPLIT_AVX512(b);
329327

330-
auto res_lo = _mm256_blendv_epi8(b_low, a_low, cond_low);
331-
auto res_hi = _mm256_blendv_epi8(b_high, a_high, cond_high);
328+
auto res_lo = _mm256_blendv_epi8(b_low, a_low, mcond_low);
329+
auto res_hi = _mm256_blendv_epi8(b_high, a_high, mcond_high);
332330

333331
XSIMD_RETURN_MERGED_AVX(res_lo, res_hi);
334332
#endif

include/xsimd/types/xsimd_avx512_int64.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,19 @@ namespace xsimd
299299

300300
static batch_type select(const batch_bool_type& cond, const batch_type& a, const batch_type& b)
301301
{
302+
#if !defined(_MSC_VER)
302303
return _mm512_mask_blend_epi64(cond, b, a);
304+
#else
305+
__m512i mcond = _mm512_maskz_broadcastq_epi64((__mmask8)cond, _mm_set1_epi32(~0));
306+
XSIMD_SPLIT_AVX512(mcond);
307+
XSIMD_SPLIT_AVX512(a);
308+
XSIMD_SPLIT_AVX512(b);
309+
310+
auto res_lo = _mm256_blendv_epi8(b_low, a_low, mcond_low);
311+
auto res_hi = _mm256_blendv_epi8(b_high, a_high, mcond_high);
312+
313+
XSIMD_RETURN_MERGED_AVX(res_lo, res_hi);
314+
#endif
303315
}
304316
};
305317

include/xsimd/types/xsimd_avx512_int_base.hpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,29 @@ namespace xsimd
1818
{
1919

2020
#define XSIMD_SPLIT_AVX512(avx_name) \
21-
__m256i avx_name##_low = _mm512_castsi512_si256(avx_name); \
22-
__m256i avx_name##_high = _mm512_extracti64x4_epi64(avx_name, 1) \
21+
__m256i avx_name##_low = _mm512_castsi512_si256((__m512i)avx_name); \
22+
__m256i avx_name##_high = _mm512_extracti64x4_epi64((__m512i)avx_name, 1) \
23+
24+
#define XSIMD_SPLITPS_AVX512(avx_name) \
25+
__m256 avx_name##_low = _mm512_castps512_ps256((__m512)avx_name); \
26+
__m256 avx_name##_high = _mm512_extractf32x8_ps((__m512)avx_name, 1) \
27+
28+
#define XSIMD_SPLITPD_AVX512(avx_name) \
29+
__m256d avx_name##_low = _mm512_castpd512_pd256((__m512d)avx_name); \
30+
__m256d avx_name##_high = _mm512_extractf64x4_pd((__m512d)avx_name, 1) \
2331

2432
#define XSIMD_RETURN_MERGED_AVX(res_low, res_high) \
2533
__m512i result = _mm512_castsi256_si512(res_low); \
2634
return _mm512_inserti64x4(result, res_high, 1) \
2735

36+
#define XSIMD_RETURN_MERGEDPS_AVX(res_low, res_high) \
37+
__m512 result = _mm512_castps256_ps512(res_low); \
38+
return _mm512_insertf32x8(result, res_high, 1) \
39+
40+
#define XSIMD_RETURN_MERGEDPD_AVX(res_low, res_high) \
41+
__m512d result = _mm512_castpd256_pd512(res_low); \
42+
return _mm512_insertf64x4(result, res_high, 1) \
43+
2844
#define XSIMD_APPLY_AVX2_FUNCTION(N, func, avx_lhs, avx_rhs) \
2945
XSIMD_SPLIT_AVX512(avx_lhs); \
3046
XSIMD_SPLIT_AVX512(avx_rhs); \

0 commit comments

Comments
 (0)