Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions crates/core_arch/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,17 @@ macro_rules! simd_extract {
($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
}

#[allow(unused)]
macro_rules! simd_masked_load {
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
$crate::intrinsics::simd::simd_masked_load::<_, _, _, { $align }>($mask, $ptr, $default)
};
}

#[allow(unused)]
macro_rules! simd_masked_store {
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
$crate::intrinsics::simd::simd_masked_store::<_, _, _, { $align }>($mask, $ptr, $default)
};
}
40 changes: 16 additions & 24 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,8 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_pd())
}

/// Stores packed double-precision (64-bit) floating-point elements from `a`
Expand All @@ -1687,7 +1688,8 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Loads packed double-precision (64-bit) floating-point elements from memory
Expand All @@ -1700,7 +1702,8 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d)
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
maskloadpd(mem_addr as *const i8, mask.as_i64x2())
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_pd())
}

/// Stores packed double-precision (64-bit) floating-point elements from `a`
Expand All @@ -1712,7 +1715,8 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Loads packed single-precision (32-bit) floating-point elements from memory
Expand All @@ -1725,7 +1729,8 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
maskloadps256(mem_addr as *const i8, mask.as_i32x8())
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_ps())
}

/// Stores packed single-precision (32-bit) floating-point elements from `a`
Expand All @@ -1737,7 +1742,8 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Loads packed single-precision (32-bit) floating-point elements from memory
Expand All @@ -1750,7 +1756,8 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256)
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
maskloadps(mem_addr as *const i8, mask.as_i32x4())
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_ps())
}

/// Stores packed single-precision (32-bit) floating-point elements from `a`
Expand All @@ -1762,7 +1769,8 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}

/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
Expand Down Expand Up @@ -3147,22 +3155,6 @@ unsafe extern "C" {
fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
#[link_name = "llvm.x86.avx.vpermilvar.pd"]
fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
#[link_name = "llvm.x86.avx.maskload.pd.256"]
fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
#[link_name = "llvm.x86.avx.maskstore.pd.256"]
fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
#[link_name = "llvm.x86.avx.maskload.pd"]
fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
#[link_name = "llvm.x86.avx.maskstore.pd"]
fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
#[link_name = "llvm.x86.avx.maskload.ps.256"]
fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
#[link_name = "llvm.x86.avx.maskstore.ps.256"]
fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
#[link_name = "llvm.x86.avx.maskload.ps"]
fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
#[link_name = "llvm.x86.avx.maskstore.ps"]
fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
#[link_name = "llvm.x86.avx.ldu.dq.256"]
fn vlddqu(mem_addr: *const i8) -> i8x32;
#[link_name = "llvm.x86.avx.rcp.ps.256"]
Expand Down
40 changes: 16 additions & 24 deletions crates/core_arch/src/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1786,7 +1786,8 @@ pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
}

/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
Expand All @@ -1799,7 +1800,8 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
}

/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
Expand All @@ -1812,7 +1814,8 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
}

/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
Expand All @@ -1825,7 +1828,8 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
}

/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1838,7 +1842,8 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
}

/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1851,7 +1856,8 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i)
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
}

/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1864,7 +1870,8 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
}

/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
Expand All @@ -1877,7 +1884,8 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i)
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
}

/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
Expand Down Expand Up @@ -3645,22 +3653,6 @@ unsafe extern "C" {
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
#[link_name = "llvm.x86.avx2.maskload.d"]
fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.maskload.d.256"]
fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.maskload.q"]
fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.maskload.q.256"]
fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.maskstore.d"]
fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
#[link_name = "llvm.x86.avx2.maskstore.d.256"]
fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
#[link_name = "llvm.x86.avx2.maskstore.q"]
fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
#[link_name = "llvm.x86.avx2.mpsadbw"]
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
Expand Down
Loading