1- // verify that simd mask reductions do not introduce additional bit shift operations
2- //@ revisions: x86 aarch64
3- //@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
4- //@ [x86] needs-llvm-components: x86
5- //@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu
6- //@ [aarch64] needs-llvm-components: aarch64
7- //@ [aarch64] min-llvm-version: 15.0
1+ // verify that simd masked load does not introduce additional bit shift operations
2+ //@ revisions: x86-avx x86-avx512
3+ //@ [x86-avx] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
4+ //@ [x86-avx] compile-flags: -C target-feature=+avx
5+ //@ [x86-avx] needs-llvm-components: x86
6+ //@ [x86-avx512] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
7+ //@ [x86-avx512] compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bw,+avx512dq
8+ //@ [x86-avx512] needs-llvm-components: x86
89//@ assembly-output: emit-asm
910//@ compile-flags: --crate-type=lib -O
1011
@@ -20,29 +21,43 @@ pub trait Sized {}
2021trait Copy { }
2122
2223#[ repr( simd) ]
23- pub struct mask8x16 ( [ i8 ; 16 ] ) ;
24+ pub struct f32x8 ( [ f32 ; 8 ] ) ;
25+
26+ #[ repr( simd) ]
27+ pub struct m32x8 ( [ i32 ; 8 ] ) ;
28+
29+ #[ repr( simd) ]
30+ pub struct f64x4 ( [ f64 ; 4 ] ) ;
31+
32+ #[ repr( simd) ]
33+ pub struct m64x4 ( [ i64 ; 4 ] ) ;
2434
2535extern "rust-intrinsic" {
26- fn simd_reduce_all < T > ( x : T ) -> bool ;
27- fn simd_reduce_any < T > ( x : T ) -> bool ;
36+ fn simd_masked_load < M , P , T > ( mask : M , pointer : P , values : T ) -> T ;
2837}
2938
30- // CHECK-LABEL: mask_reduce_all:
39+ // CHECK-LABEL: load_f32x8
3140#[ no_mangle]
32- pub unsafe fn mask_reduce_all ( m : mask8x16 ) -> bool {
33- // x86: movdqa
34- // x86-NEXT: pmovmskb
35- // aarch64: cmge
36- // aarch64-NEXT: umaxv
37- simd_reduce_all ( m)
41+ pub unsafe fn load_f32x8 ( mask : m32x8 , pointer : * const f32 , output : * mut f32x8 ) {
42+ // x86-avx-NOT: vpslld
43+ // x86-avx: vmovaps ymm0
44+ // x86-avx-NEXT: vmaskmovps
45+ // x86-avx512-NOT: vpslld
46+ // x86-avx512: vpcmpgtd k1
47+ // x86-avx512-NEXT: vmovups ymm0 {k1} {z}
48+ // x86-avx512-NEXT: vmovaps
49+ * output = simd_masked_load ( mask, pointer, f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) )
3850}
3951
40- // CHECK-LABEL: mask_reduce_any:
52+ // CHECK-LABEL: load_f64x4
4153#[ no_mangle]
42- pub unsafe fn mask_reduce_any ( m : mask8x16 ) -> bool {
43- // x86: movdqa
44- // x86-NEXT: pmovmskb
45- // aarch64: cmlt
46- // aarch64-NEXT: umaxv
47- simd_reduce_any ( m)
54+ pub unsafe fn load_f64x4 ( mask : m64x4 , pointer : * const f64 , output : * mut f64x4 ) {
55+ // x86-avx-NOT: vpsllq
56+ // x86-avx: vmovapd
57+ // x86-avx-NEXT: vmaskmovpd ymm0
58+ // x86-avx512-NOT: vpsllq
59+ // x86-avx512: vpcmpgtq k1
60+ // x86-avx512-NEXT: vmovupd ymm0 {k1} {z}
61+ // x86-avx512-NEXT: vmovapd
62+ * output = simd_masked_load ( mask, pointer, f64x4 ( [ 0_f64 , 0_f64 , 0_f64 , 0_f64 ] ) )
4863}
0 commit comments