From c837442be6cbc7806ca7c42decb3d4d01c0a12e2 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Wed, 22 Oct 2025 16:52:45 +0530 Subject: [PATCH 1/3] fix: update the mask preprocessing step in _mm_mask_cvtepi16_epi8 --- crates/core_arch/src/x86/avx512bw.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index fadc0e2cc0..e08d149db6 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -10681,7 +10681,7 @@ pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { unsafe { let convert = _mm_cvtepi16_epi8(a).as_i8x16(); - let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; + let k: __mmask16 = 0b11111111_00000000 | k as __mmask16; transmute(simd_select_bitmask(k, convert, src.as_i8x16())) } } @@ -20559,6 +20559,12 @@ mod tests { let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a); let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); assert_eq_m128i(r, e); + + let src = _mm_set_epi8(1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1); + let a = _mm_set1_epi16(3); + let r = _mm_mask_cvtepi16_epi8(src, 0b11011110, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 1, 3, 3, 3, 3, 1); + assert_eq_m128i(r, e); } #[simd_test(enable = "avx512bw,avx512vl")] From 2410faef823343022bb79c7d60f83a843effdccd Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 27 Oct 2025 20:11:06 +0530 Subject: [PATCH 2/3] Comparing rust intrinsics with GCC instead of Clang --- ci/intrinsic-test.sh | 4 +-- crates/intrinsic-test/missing_x86.txt | 48 +++++++++++++-------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index e14a824b2a..9a1e80286c 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -69,10 +69,10 @@ case ${TARGET} in x86_64-unknown-linux-gnu*) TEST_CPPFLAGS="-fuse-ld=lld -I/usr/include/x86_64-linux-gnu/" - TEST_CXX_COMPILER="clang++" + TEST_CXX_COMPILER="g++" TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt - TEST_SAMPLE_INTRINSICS_PERCENTAGE=5 + TEST_SAMPLE_INTRINSICS_PERCENTAGE=100 ;; *) ;; diff --git a/crates/intrinsic-test/missing_x86.txt b/crates/intrinsic-test/missing_x86.txt index 58e37b92a1..ea086bf565 100644 --- a/crates/intrinsic-test/missing_x86.txt +++ b/crates/intrinsic-test/missing_x86.txt @@ -877,28 +877,28 @@ _mm512_mask_floor_ps _xgetbv # Miscellaneous issues that can be fixed first -_kshiftli_mask16 -_kshiftli_mask32 -_kshiftli_mask64 -_kshiftli_mask8 -_kshiftri_mask16 -_kshiftri_mask32 -_kshiftri_mask64 -_kshiftri_mask8 -_mm256_castsi128_si256 -_mm256_extract_epi16 -_mm256_extract_epi8 -_mm512_castsi128_si512 -_mm512_castsi256_si512 +# _kshiftli_mask16 +# _kshiftli_mask32 +# _kshiftli_mask64 +# _kshiftli_mask8 +# _kshiftri_mask16 +# _kshiftri_mask32 +# _kshiftri_mask64 +# _kshiftri_mask8 +# _mm256_castsi128_si256 +# _mm256_extract_epi16 +# _mm256_extract_epi8 +# _mm512_castsi128_si512 +# _mm512_castsi256_si512 # _mm512_conj_pch -_mm512_mask_reduce_max_pd -_mm512_mask_reduce_max_ps -_mm512_mask_reduce_min_pd -_mm512_mask_reduce_min_ps -_mm_comineq_sh -_mm_extract_epi16 -_mm_extract_epi8 -_mm_mask_cvtepi16_epi8 -_mm_mask_cvtpd_epi32 -_mm_mask_cvtpd_ps -_mm_ucomineq_sh \ No newline at end of file +# _mm512_mask_reduce_max_pd +# _mm512_mask_reduce_max_ps +# _mm512_mask_reduce_min_pd +# _mm512_mask_reduce_min_ps +# _mm_comineq_sh +# _mm_extract_epi16 +# _mm_extract_epi8 +# _mm_mask_cvtepi16_epi8 +# _mm_mask_cvtpd_epi32 +# _mm_mask_cvtpd_ps +# _mm_ucomineq_sh \ No newline at end of file From 6fee90d5c53394375cdca3177dcfa57a6cb05d57 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 27 Oct 2025 20:38:05 +0530 Subject: [PATCH 3/3] adding gcc-specific flags --- crates/intrinsic-test/src/arm/compile.rs | 8 +++++--- crates/intrinsic-test/src/common/compile_c.rs | 4 ---- crates/intrinsic-test/src/x86/compile.rs | 12 ++++++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/crates/intrinsic-test/src/arm/compile.rs b/crates/intrinsic-test/src/arm/compile.rs index 7da35f9a11..0e6d699c2b 100644 --- a/crates/intrinsic-test/src/arm/compile.rs +++ b/crates/intrinsic-test/src/arm/compile.rs @@ -18,9 +18,11 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { command = command.add_arch_flags(["faminmax", "lut", "sha3"]); } - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } + command = if !cpp_compiler.contains("clang") { + command.add_extra_flag("-flax-vector-conversions") + } else { + command.add_extra_flag(format!("--target={}", config.target).as_str()) + }; let mut cpp_compiler = command.into_cpp_compilation(); diff --git a/crates/intrinsic-test/src/common/compile_c.rs b/crates/intrinsic-test/src/common/compile_c.rs index fa78b332a7..01e87762f4 100644 --- a/crates/intrinsic-test/src/common/compile_c.rs +++ b/crates/intrinsic-test/src/common/compile_c.rs @@ -84,10 +84,6 @@ impl CompilationCommandBuilder { cpp_compiler.args(self.extra_flags); - if let Some(target) = &self.target { - cpp_compiler.arg(format!("--target={target}")); - } - CppCompilation(cpp_compiler) } } diff --git a/crates/intrinsic-test/src/x86/compile.rs b/crates/intrinsic-test/src/x86/compile.rs index 60997a1278..0770e5b9fb 100644 --- a/crates/intrinsic-test/src/x86/compile.rs +++ b/crates/intrinsic-test/src/x86/compile.rs @@ -33,13 +33,17 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option { "-mavx512vbmi2", "-mavx512vnni", "-mavx512vpopcntdq", - "-ferror-limit=1000", "-std=c++23", ]); - if !cpp_compiler.contains("clang") { - command = command.add_extra_flag("-flax-vector-conversions"); - } + command = if !cpp_compiler.contains("clang") { + command.add_extra_flags(vec!["-fmax-errors=1000", "-flax-vector-conversions"]) + } else { + command.add_extra_flags(vec![ + "-ferror-limit=1000", + format!("--target={}", config.target).as_str(), + ]) + }; let cpp_compiler = command.into_cpp_compilation();