@@ -16156,6 +16156,126 @@ pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
1615616156 transmute(vcompresspd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k))
1615716157}
1615816158
16159+ /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16160+ ///
16161+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_epi32)
16162+ #[inline]
16163+ #[target_feature(enable = "avx512f")]
16164+ #[cfg_attr(test, assert_instr(vpcompressd))]
16165+ pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
16166+ vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
16167+ }
16168+
16169+ /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16170+ ///
16171+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_epi32)
16172+ #[inline]
16173+ #[target_feature(enable = "avx512f,avx512vl")]
16174+ #[cfg_attr(test, assert_instr(vpcompressd))]
16175+ pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
16176+ vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
16177+ }
16178+
16179+ /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16180+ ///
16181+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_epi32)
16182+ #[inline]
16183+ #[target_feature(enable = "avx512f,avx512vl")]
16184+ #[cfg_attr(test, assert_instr(vpcompressd))]
16185+ pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
16186+ vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
16187+ }
16188+
16189+ /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16190+ ///
16191+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_epi64)
16192+ #[inline]
16193+ #[target_feature(enable = "avx512f")]
16194+ #[cfg_attr(test, assert_instr(vpcompressq))]
16195+ pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
16196+ vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
16197+ }
16198+
16199+ /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16200+ ///
16201+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_epi64)
16202+ #[inline]
16203+ #[target_feature(enable = "avx512f,avx512vl")]
16204+ #[cfg_attr(test, assert_instr(vpcompressq))]
16205+ pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
16206+ vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
16207+ }
16208+
16209+ /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16210+ ///
16211+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_epi64)
16212+ #[inline]
16213+ #[target_feature(enable = "avx512f,avx512vl")]
16214+ #[cfg_attr(test, assert_instr(vpcompressq))]
16215+ pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
16216+ vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
16217+ }
16218+
16219+ /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16220+ ///
16221+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_ps)
16222+ #[inline]
16223+ #[target_feature(enable = "avx512f")]
16224+ #[cfg_attr(test, assert_instr(vcompressps))]
16225+ pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
16226+ vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
16227+ }
16228+
16229+ /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16230+ ///
16231+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_ps)
16232+ #[inline]
16233+ #[target_feature(enable = "avx512f,avx512vl")]
16234+ #[cfg_attr(test, assert_instr(vcompressps))]
16235+ pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
16236+ vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
16237+ }
16238+
16239+ /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16240+ ///
16241+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_ps)
16242+ #[inline]
16243+ #[target_feature(enable = "avx512f,avx512vl")]
16244+ #[cfg_attr(test, assert_instr(vcompressps))]
16245+ pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
16246+ vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
16247+ }
16248+
16249+ /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16250+ ///
16251+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_pd)
16252+ #[inline]
16253+ #[target_feature(enable = "avx512f")]
16254+ #[cfg_attr(test, assert_instr(vcompresspd))]
16255+ pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
16256+ vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
16257+ }
16258+
16259+ /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16260+ ///
16261+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_pd)
16262+ #[inline]
16263+ #[target_feature(enable = "avx512f,avx512vl")]
16264+ #[cfg_attr(test, assert_instr(vcompresspd))]
16265+ pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
16266+ vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
16267+ }
16268+
16269+ /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16270+ ///
16271+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_pd)
16272+ #[inline]
16273+ #[target_feature(enable = "avx512f,avx512vl")]
16274+ #[cfg_attr(test, assert_instr(vcompresspd))]
16275+ pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
16276+ vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
16277+ }
16278+
1615916279/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1616016280///
1616116281/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi32&expand=2316)
@@ -38007,6 +38127,34 @@ extern "C" {
3800738127 #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
3800838128 fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
3800938129
38130+ #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
38131+ fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
38132+ #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
38133+ fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
38134+ #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
38135+ fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
38136+
38137+ #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
38138+ fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
38139+ #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
38140+ fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
38141+ #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
38142+ fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
38143+
38144+ #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
38145+ fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
38146+ #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
38147+ fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
38148+ #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
38149+ fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
38150+
38151+ #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
38152+ fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
38153+ #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
38154+ fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
38155+ #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
38156+ fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
38157+
3801038158 #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
3801138159 fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
3801238160 #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
@@ -51357,6 +51505,138 @@ mod tests {
5135751505 assert_eq_m128(r, e);
5135851506 }
5135951507
51508+ #[simd_test(enable = "avx512f")]
51509+ unsafe fn test_mm512_mask_compressstoreu_epi32() {
51510+ let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51511+ let mut r = [0_i32; 16];
51512+ _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
51513+ assert_eq!(&r, &[0_i32; 16]);
51514+ _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
51515+ assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
51516+ }
51517+
51518+ #[simd_test(enable = "avx512f,avx512vl")]
51519+ unsafe fn test_mm256_mask_compressstoreu_epi32() {
51520+ let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51521+ let mut r = [0_i32; 8];
51522+ _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
51523+ assert_eq!(&r, &[0_i32; 8]);
51524+ _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a);
51525+ assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
51526+ }
51527+
51528+ #[simd_test(enable = "avx512f,avx512vl")]
51529+ unsafe fn test_mm_mask_compressstoreu_epi32() {
51530+ let a = _mm_setr_epi32(1, 2, 3, 4);
51531+ let mut r = [0_i32; 4];
51532+ _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
51533+ assert_eq!(&r, &[0_i32; 4]);
51534+ _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a);
51535+ assert_eq!(&r, &[1, 2, 4, 0]);
51536+ }
51537+
51538+ #[simd_test(enable = "avx512f")]
51539+ unsafe fn test_mm512_mask_compressstoreu_epi64() {
51540+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51541+ let mut r = [0_i64; 8];
51542+ _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
51543+ assert_eq!(&r, &[0_i64; 8]);
51544+ _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a);
51545+ assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
51546+ }
51547+
51548+ #[simd_test(enable = "avx512f,avx512vl")]
51549+ unsafe fn test_mm256_mask_compressstoreu_epi64() {
51550+ let a = _mm256_setr_epi64x(1, 2, 3, 4);
51551+ let mut r = [0_i64; 4];
51552+ _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
51553+ assert_eq!(&r, &[0_i64; 4]);
51554+ _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a);
51555+ assert_eq!(&r, &[1, 2, 4, 0]);
51556+ }
51557+
51558+ #[simd_test(enable = "avx512f,avx512vl")]
51559+ unsafe fn test_mm_mask_compressstoreu_epi64() {
51560+ let a = _mm_setr_epi64x(1, 2);
51561+ let mut r = [0_i64; 2];
51562+ _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
51563+ assert_eq!(&r, &[0_i64; 2]);
51564+ _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a);
51565+ assert_eq!(&r, &[2, 0]);
51566+ }
51567+
51568+ #[simd_test(enable = "avx512f")]
51569+ unsafe fn test_mm512_mask_compressstoreu_ps() {
51570+ let a = _mm512_setr_ps(
51571+ 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
51572+ 13_f32, 14_f32, 15_f32, 16_f32,
51573+ );
51574+ let mut r = [0_f32; 16];
51575+ _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
51576+ assert_eq!(&r, &[0_f32; 16]);
51577+ _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
51578+ assert_eq!(
51579+ &r,
51580+ &[
51581+ 2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
51582+ 0_f32, 0_f32, 0_f32, 0_f32, 0_f32
51583+ ]
51584+ );
51585+ }
51586+
51587+ #[simd_test(enable = "avx512f,avx512vl")]
51588+ unsafe fn test_mm256_mask_compressstoreu_ps() {
51589+ let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
51590+ let mut r = [0_f32; 8];
51591+ _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
51592+ assert_eq!(&r, &[0_f32; 8]);
51593+ _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a);
51594+ assert_eq!(
51595+ &r,
51596+ &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
51597+ );
51598+ }
51599+
51600+ #[simd_test(enable = "avx512f,avx512vl")]
51601+ unsafe fn test_mm_mask_compressstoreu_ps() {
51602+ let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
51603+ let mut r = [0.; 4];
51604+ _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
51605+ assert_eq!(&r, &[0.; 4]);
51606+ _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a);
51607+ assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
51608+ }
51609+
51610+ #[simd_test(enable = "avx512f")]
51611+ unsafe fn test_mm512_mask_compressstoreu_pd() {
51612+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
51613+ let mut r = [0.; 8];
51614+ _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
51615+ assert_eq!(&r, &[0.; 8]);
51616+ _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a);
51617+ assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
51618+ }
51619+
51620+ #[simd_test(enable = "avx512f,avx512vl")]
51621+ unsafe fn test_mm256_mask_compressstoreu_pd() {
51622+ let a = _mm256_setr_pd(1., 2., 3., 4.);
51623+ let mut r = [0.; 4];
51624+ _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
51625+ assert_eq!(&r, &[0.; 4]);
51626+ _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a);
51627+ assert_eq!(&r, &[1., 2., 4., 0.]);
51628+ }
51629+
51630+ #[simd_test(enable = "avx512f,avx512vl")]
51631+ unsafe fn test_mm_mask_compressstoreu_pd() {
51632+ let a = _mm_setr_pd(1., 2.);
51633+ let mut r = [0.; 2];
51634+ _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
51635+ assert_eq!(&r, &[0.; 2]);
51636+ _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a);
51637+ assert_eq!(&r, &[2., 0.]);
51638+ }
51639+
5136051640 #[simd_test(enable = "avx512f")]
5136151641 unsafe fn test_mm512_mask_expand_epi32() {
5136251642 let src = _mm512_set1_epi32(200);
0 commit comments