@@ -9388,6 +9388,96 @@ pub unsafe fn _mm_maskz_alignr_epi8(k: __mmask16, a: __m128i, b: __m128i, imm8:
93889388 transmute(simd_select_bitmask(k, r.as_i8x16(), zero))
93899389}
93909390
9391+ /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9392+ ///
9393+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
9394+ #[inline]
9395+ #[target_feature(enable = "avx512bw")]
9396+ #[cfg_attr(test, assert_instr(vpmovswb))]
9397+ pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9398+ vpmovswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9399+ }
9400+
9401+ /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9402+ ///
9403+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
9404+ #[inline]
9405+ #[target_feature(enable = "avx512bw,avx512vl")]
9406+ #[cfg_attr(test, assert_instr(vpmovswb))]
9407+ pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9408+ vpmovswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9409+ }
9410+
9411+ /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9412+ ///
9413+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
9414+ #[inline]
9415+ #[target_feature(enable = "avx512bw,avx512vl")]
9416+ #[cfg_attr(test, assert_instr(vpmovswb))]
9417+ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9418+ vpmovswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9419+ }
9420+
9421+ /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9422+ ///
9423+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
9424+ #[inline]
9425+ #[target_feature(enable = "avx512bw")]
9426+ #[cfg_attr(test, assert_instr(vpmovwb))]
9427+ pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9428+ vpmovwbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9429+ }
9430+
9431+ /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9432+ ///
9433+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
9434+ #[inline]
9435+ #[target_feature(enable = "avx512bw,avx512vl")]
9436+ #[cfg_attr(test, assert_instr(vpmovwb))]
9437+ pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9438+ vpmovwbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9439+ }
9440+
9441+ /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9442+ ///
9443+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
9444+ #[inline]
9445+ #[target_feature(enable = "avx512bw,avx512vl")]
9446+ #[cfg_attr(test, assert_instr(vpmovwb))]
9447+ pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9448+ vpmovwbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9449+ }
9450+
9451+ /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9452+ ///
9453+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
9454+ #[inline]
9455+ #[target_feature(enable = "avx512bw")]
9456+ #[cfg_attr(test, assert_instr(vpmovuswb))]
9457+ pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9458+ vpmovuswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9459+ }
9460+
9461+ /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9462+ ///
9463+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
9464+ #[inline]
9465+ #[target_feature(enable = "avx512bw,avx512vl")]
9466+ #[cfg_attr(test, assert_instr(vpmovuswb))]
9467+ pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9468+ vpmovuswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9469+ }
9470+
9471+ /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9472+ ///
9473+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
9474+ #[inline]
9475+ #[target_feature(enable = "avx512bw,avx512vl")]
9476+ #[cfg_attr(test, assert_instr(vpmovuswb))]
9477+ pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9478+ vpmovuswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9479+ }
9480+
93919481#[allow(improper_ctypes)]
93929482extern "C" {
93939483 #[link_name = "llvm.x86.avx512.mask.paddus.w.512"]
@@ -9594,6 +9684,27 @@ extern "C" {
95949684 fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
95959685 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
95969686 fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
9687+
9688+ #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
9689+ fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9690+ #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
9691+ fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9692+ #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
9693+ fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9694+
9695+ #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
9696+ fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9697+ #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
9698+ fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9699+ #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
9700+ fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
9701+
9702+ #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
9703+ fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
9704+ #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
9705+ fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
9706+ #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
9707+ fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
95979708}
95989709
95999710#[cfg(test)]
@@ -17905,4 +18016,107 @@ mod tests {
1790518016 let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
1790618017 assert_eq_m128i(r, e);
1790718018 }
18019+
18020+ #[simd_test(enable = "avx512bw")]
18021+ unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
18022+ let a = _mm512_set1_epi16(i16::MAX);
18023+ let mut r = _mm256_undefined_si256();
18024+ _mm512_mask_cvtsepi16_storeu_epi8(
18025+ &mut r as *mut _ as *mut i8,
18026+ 0b11111111_11111111_11111111_11111111,
18027+ a,
18028+ );
18029+ let e = _mm256_set1_epi8(i8::MAX);
18030+ assert_eq_m256i(r, e);
18031+ }
18032+
18033+ #[simd_test(enable = "avx512bw,avx512vl")]
18034+ unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
18035+ let a = _mm256_set1_epi16(i16::MAX);
18036+ let mut r = _mm_undefined_si128();
18037+ _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18038+ let e = _mm_set1_epi8(i8::MAX);
18039+ assert_eq_m128i(r, e);
18040+ }
18041+
18042+ #[simd_test(enable = "avx512bw,avx512vl")]
18043+ unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
18044+ let a = _mm_set1_epi16(i16::MAX);
18045+ let mut r = _mm_set1_epi8(0);
18046+ _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18047+ #[rustfmt::skip]
18048+ let e = _mm_set_epi8(
18049+ 0, 0, 0, 0, 0, 0, 0, 0,
18050+ i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
18051+ );
18052+ assert_eq_m128i(r, e);
18053+ }
18054+
18055+ #[simd_test(enable = "avx512bw")]
18056+ unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
18057+ let a = _mm512_set1_epi16(8);
18058+ let mut r = _mm256_undefined_si256();
18059+ _mm512_mask_cvtepi16_storeu_epi8(
18060+ &mut r as *mut _ as *mut i8,
18061+ 0b11111111_11111111_11111111_11111111,
18062+ a,
18063+ );
18064+ let e = _mm256_set1_epi8(8);
18065+ assert_eq_m256i(r, e);
18066+ }
18067+
18068+ #[simd_test(enable = "avx512bw,avx512vl")]
18069+ unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
18070+ let a = _mm256_set1_epi16(8);
18071+ let mut r = _mm_undefined_si128();
18072+ _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18073+ let e = _mm_set1_epi8(8);
18074+ assert_eq_m128i(r, e);
18075+ }
18076+
18077+ #[simd_test(enable = "avx512bw,avx512vl")]
18078+ unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
18079+ let a = _mm_set1_epi16(8);
18080+ let mut r = _mm_set1_epi8(0);
18081+ _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18082+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
18083+ assert_eq_m128i(r, e);
18084+ }
18085+
18086+ #[simd_test(enable = "avx512bw")]
18087+ unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
18088+ let a = _mm512_set1_epi16(i16::MAX);
18089+ let mut r = _mm256_undefined_si256();
18090+ _mm512_mask_cvtusepi16_storeu_epi8(
18091+ &mut r as *mut _ as *mut i8,
18092+ 0b11111111_11111111_11111111_11111111,
18093+ a,
18094+ );
18095+ let e = _mm256_set1_epi8(u8::MAX as i8);
18096+ assert_eq_m256i(r, e);
18097+ }
18098+
18099+ #[simd_test(enable = "avx512bw,avx512vl")]
18100+ unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
18101+ let a = _mm256_set1_epi16(i16::MAX);
18102+ let mut r = _mm_undefined_si128();
18103+ _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18104+ let e = _mm_set1_epi8(u8::MAX as i8);
18105+ assert_eq_m128i(r, e);
18106+ }
18107+
18108+ #[simd_test(enable = "avx512bw,avx512vl")]
18109+ unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
18110+ let a = _mm_set1_epi16(i16::MAX);
18111+ let mut r = _mm_set1_epi8(0);
18112+ _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18113+ #[rustfmt::skip]
18114+ let e = _mm_set_epi8(
18115+ 0, 0, 0, 0,
18116+ 0, 0, 0, 0,
18117+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18118+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18119+ );
18120+ assert_eq_m128i(r, e);
18121+ }
1790818122}
0 commit comments