@@ -7,6 +7,8 @@ use crate::{
77#[cfg(test)]
88use stdarch_test::assert_instr;
99
10+ use super::avx512f::{vpl, vps};
11+
1012/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
1113///
1214/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
@@ -4237,11 +4239,11 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
42374239pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
42384240 let mut dst: __m512i = src;
42394241 asm!(
4240- "vmovdqu16 {2 }{{{1 }}}, [{0}]" ,
4241- in(reg) mem_addr,
4242- in(kreg) k,
4243- inout(zmm_reg) dst,
4244- options(pure, readonly, nostack)
4242+ vpl!( "vmovdqu16 {dst }{{{k }}}") ,
4243+ p = in(reg) mem_addr,
4244+ k = in(kreg) k,
4245+ dst = inout(zmm_reg) dst,
4246+ options(pure, readonly, nostack)
42454247 );
42464248 dst
42474249}
@@ -4256,11 +4258,11 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
42564258pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
42574259 let mut dst: __m512i;
42584260 asm!(
4259- "vmovdqu16 {2 }{{{1 }}} {{z}}, [{0}]" ,
4260- in(reg) mem_addr,
4261- in(kreg) k,
4262- out(zmm_reg) dst,
4263- options(pure, readonly, nostack)
4261+ vpl!( "vmovdqu16 {dst }{{{k }}} {{z}}") ,
4262+ p = in(reg) mem_addr,
4263+ k = in(kreg) k,
4264+ dst = out(zmm_reg) dst,
4265+ options(pure, readonly, nostack)
42644266 );
42654267 dst
42664268}
@@ -4275,11 +4277,11 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
42754277pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
42764278 let mut dst: __m512i = src;
42774279 asm!(
4278- "vmovdqu8 {2 }{{{1 }}}, [{0}]" ,
4279- in(reg) mem_addr,
4280- in(kreg) k,
4281- inout(zmm_reg) dst,
4282- options(pure, readonly, nostack)
4280+ vpl!( "vmovdqu8 {dst }{{{k }}}") ,
4281+ p = in(reg) mem_addr,
4282+ k = in(kreg) k,
4283+ dst = inout(zmm_reg) dst,
4284+ options(pure, readonly, nostack)
42834285 );
42844286 dst
42854287}
@@ -4294,11 +4296,11 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
42944296pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
42954297 let mut dst: __m512i;
42964298 asm!(
4297- "vmovdqu8 {2 }{{{1 }}} {{z}}, [{0}]" ,
4298- in(reg) mem_addr,
4299- in(kreg) k,
4300- out(zmm_reg) dst,
4301- options(pure, readonly, nostack)
4299+ vpl!( "vmovdqu8 {dst }{{{k }}} {{z}}") ,
4300+ p = in(reg) mem_addr,
4301+ k = in(kreg) k,
4302+ dst = out(zmm_reg) dst,
4303+ options(pure, readonly, nostack)
43024304 );
43034305 dst
43044306}
@@ -4313,11 +4315,11 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
43134315pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
43144316 let mut dst: __m256i = src;
43154317 asm!(
4316- "vmovdqu16 {2 }{{{1 }}}, [{0}]" ,
4317- in(reg) mem_addr,
4318- in(kreg) k,
4319- inout(ymm_reg) dst,
4320- options(pure, readonly, nostack)
4318+ vpl!( "vmovdqu16 {dst }{{{k }}}") ,
4319+ p = in(reg) mem_addr,
4320+ k = in(kreg) k,
4321+ dst = inout(ymm_reg) dst,
4322+ options(pure, readonly, nostack)
43214323 );
43224324 dst
43234325}
@@ -4332,11 +4334,11 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
43324334pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
43334335 let mut dst: __m256i;
43344336 asm!(
4335- "vmovdqu16 {2 }{{{1 }}} {{z}}, [{0}]" ,
4336- in(reg) mem_addr,
4337- in(kreg) k,
4338- out(ymm_reg) dst,
4339- options(pure, readonly, nostack)
4337+ vpl!( "vmovdqu16 {dst }{{{k }}} {{z}}") ,
4338+ p = in(reg) mem_addr,
4339+ k = in(kreg) k,
4340+ dst = out(ymm_reg) dst,
4341+ options(pure, readonly, nostack)
43404342 );
43414343 dst
43424344}
@@ -4351,11 +4353,11 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
43514353pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
43524354 let mut dst: __m256i = src;
43534355 asm!(
4354- "vmovdqu8 {2 }{{{1 }}}, [{0}]" ,
4355- in(reg) mem_addr,
4356- in(kreg) k,
4357- inout(ymm_reg) dst,
4358- options(pure, readonly, nostack)
4356+ vpl!( "vmovdqu8 {dst }{{{k }}}") ,
4357+ p = in(reg) mem_addr,
4358+ k = in(kreg) k,
4359+ dst = inout(ymm_reg) dst,
4360+ options(pure, readonly, nostack)
43594361 );
43604362 dst
43614363}
@@ -4370,11 +4372,11 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
43704372pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
43714373 let mut dst: __m256i;
43724374 asm!(
4373- "vmovdqu8 {2 }{{{1 }}} {{z}}, [{0}]" ,
4374- in(reg) mem_addr,
4375- in(kreg) k,
4376- out(ymm_reg) dst,
4377- options(pure, readonly, nostack)
4375+ vpl!( "vmovdqu8 {dst }{{{k }}} {{z}}") ,
4376+ p = in(reg) mem_addr,
4377+ k = in(kreg) k,
4378+ dst = out(ymm_reg) dst,
4379+ options(pure, readonly, nostack)
43784380 );
43794381 dst
43804382}
@@ -4389,11 +4391,11 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
43894391pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
43904392 let mut dst: __m128i = src;
43914393 asm!(
4392- "vmovdqu16 {2 }{{{1 }}}, [{0}]" ,
4393- in(reg) mem_addr,
4394- in(kreg) k,
4395- inout(xmm_reg) dst,
4396- options(pure, readonly, nostack)
4394+ vpl!( "vmovdqu16 {dst }{{{k }}}") ,
4395+ p = in(reg) mem_addr,
4396+ k = in(kreg) k,
4397+ dst = inout(xmm_reg) dst,
4398+ options(pure, readonly, nostack)
43974399 );
43984400 dst
43994401}
@@ -4408,11 +4410,11 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
44084410pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
44094411 let mut dst: __m128i;
44104412 asm!(
4411- "vmovdqu16 {2 }{{{1 }}} {{z}}, [{0}]" ,
4412- in(reg) mem_addr,
4413- in(kreg) k,
4414- out(xmm_reg) dst,
4415- options(pure, readonly, nostack)
4413+ vpl!( "vmovdqu16 {dst }{{{k }}} {{z}}") ,
4414+ p = in(reg) mem_addr,
4415+ k = in(kreg) k,
4416+ dst = out(xmm_reg) dst,
4417+ options(pure, readonly, nostack)
44164418 );
44174419 dst
44184420}
@@ -4427,11 +4429,11 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
44274429pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
44284430 let mut dst: __m128i = src;
44294431 asm!(
4430- "vmovdqu8 {2 }{{{1 }}}, [{0}]" ,
4431- in(reg) mem_addr,
4432- in(kreg) k,
4433- inout(xmm_reg) dst,
4434- options(pure, readonly, nostack)
4432+ vpl!( "vmovdqu8 {dst }{{{k }}}") ,
4433+ p = in(reg) mem_addr,
4434+ k = in(kreg) k,
4435+ dst = inout(xmm_reg) dst,
4436+ options(pure, readonly, nostack)
44354437 );
44364438 dst
44374439}
@@ -4446,11 +4448,11 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
44464448pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
44474449 let mut dst: __m128i;
44484450 asm!(
4449- "vmovdqu8 {2 }{{{1 }}} {{z}}, [{0}]" ,
4450- in(reg) mem_addr,
4451- in(kreg) k,
4452- out(xmm_reg) dst,
4453- options(pure, readonly, nostack)
4451+ vpl!( "vmovdqu8 {dst }{{{k }}} {{z}}") ,
4452+ p = in(reg) mem_addr,
4453+ k = in(kreg) k,
4454+ dst = out(xmm_reg) dst,
4455+ options(pure, readonly, nostack)
44544456 );
44554457 dst
44564458}
@@ -4463,11 +4465,11 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
44634465#[target_feature(enable = "avx512f,avx512bw")]
44644466pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
44654467 asm!(
4466- "vmovdqu16 [{0}]{{{1 }}}, {2}" ,
4467- in(reg) mem_addr,
4468- in(kreg) mask,
4469- in(zmm_reg) a,
4470- options(nostack)
4468+ vps!( "vmovdqu16", "{{{mask }}}, {a}") ,
4469+ p = in(reg) mem_addr,
4470+ mask = in(kreg) mask,
4471+ a = in(zmm_reg) a,
4472+ options(nostack)
44714473 );
44724474}
44734475
@@ -4479,11 +4481,11 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
44794481#[target_feature(enable = "avx512f,avx512bw")]
44804482pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
44814483 asm!(
4482- "vmovdqu8 [{0}]{{{1 }}}, {2}" ,
4483- in(reg) mem_addr,
4484- in(kreg) mask,
4485- in(zmm_reg) a,
4486- options(nostack)
4484+ vps!( "vmovdqu8", "{{{mask }}}, {a}") ,
4485+ p = in(reg) mem_addr,
4486+ mask = in(kreg) mask,
4487+ a = in(zmm_reg) a,
4488+ options(nostack)
44874489 );
44884490}
44894491
@@ -4495,11 +4497,11 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
44954497#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
44964498pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
44974499 asm!(
4498- "vmovdqu16 [{0}]{{{1 }}}, {2}" ,
4499- in(reg) mem_addr,
4500- in(kreg) mask,
4501- in(ymm_reg) a,
4502- options(nostack)
4500+ vps!( "vmovdqu16", "{{{mask }}}, {a}") ,
4501+ p = in(reg) mem_addr,
4502+ mask = in(kreg) mask,
4503+ a = in(ymm_reg) a,
4504+ options(nostack)
45034505 );
45044506}
45054507
@@ -4511,11 +4513,11 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
45114513#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
45124514pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
45134515 asm!(
4514- "vmovdqu8 [{0}]{{{1 }}}, {2}" ,
4515- in(reg) mem_addr,
4516- in(kreg) mask,
4517- in(ymm_reg) a,
4518- options(nostack)
4516+ vps!( "vmovdqu8", "{{{mask }}}, {a}") ,
4517+ p = in(reg) mem_addr,
4518+ mask = in(kreg) mask,
4519+ a = in(ymm_reg) a,
4520+ options(nostack)
45194521 );
45204522}
45214523
@@ -4527,11 +4529,11 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
45274529#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
45284530pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
45294531 asm!(
4530- "vmovdqu16 [{0}]{{{1 }}}, {2}" ,
4531- in(reg) mem_addr,
4532- in(kreg) mask,
4533- in(xmm_reg) a,
4534- options(nostack)
4532+ vps!( "vmovdqu16", "{{{mask }}}, {a}") ,
4533+ p = in(reg) mem_addr,
4534+ mask = in(kreg) mask,
4535+ a = in(xmm_reg) a,
4536+ options(nostack)
45354537 );
45364538}
45374539
@@ -4543,11 +4545,11 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
45434545#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
45444546pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
45454547 asm!(
4546- "vmovdqu8 [{0}]{{{1 }}}, {2}" ,
4547- in(reg) mem_addr,
4548- in(kreg) mask,
4549- in(xmm_reg) a,
4550- options(nostack)
4548+ vps!( "vmovdqu8", "{{{mask }}}, {a}") ,
4549+ p = in(reg) mem_addr,
4550+ mask = in(kreg) mask,
4551+ a = in(xmm_reg) a,
4552+ options(nostack)
45514553 );
45524554}
45534555
0 commit comments