99//@ assembly-output: emit-asm
1010//@ compile-flags: --crate-type=lib -Copt-level=3 -C panic=abort
1111
12- #![ feature( no_core, lang_items, repr_simd, intrinsics) ]
12+ #![ feature( no_core, lang_items, repr_simd, intrinsics, adt_const_params ) ]
1313#![ no_core]
1414#![ allow( non_camel_case_types) ]
1515
@@ -35,7 +35,7 @@ pub struct f64x4([f64; 4]);
3535pub struct m64x4 ( [ i64 ; 4 ] ) ;
3636
3737#[ rustc_intrinsic]
38- unsafe fn simd_masked_load < M , P , T > ( mask : M , pointer : P , values : T ) -> T ;
38+ unsafe fn simd_masked_load < M , P , T , const ALIGN : SimdAlign > ( mask : M , pointer : P , values : T ) -> T ;
3939
4040// CHECK-LABEL: load_i8x16
4141#[ no_mangle]
@@ -56,7 +56,11 @@ pub unsafe extern "C" fn load_i8x16(mask: m8x16, pointer: *const i8) -> i8x16 {
5656 // x86-avx512-NOT: vpsllw
5757 // x86-avx512: vpmovb2m k1, xmm0
5858 // x86-avx512-NEXT: vmovdqu8 xmm0 {k1} {z}, xmmword ptr [rdi]
59- simd_masked_load ( mask, pointer, i8x16 ( [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ) )
59+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Element } > (
60+ mask,
61+ pointer,
62+ i8x16 ( [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ) ,
63+ )
6064}
6165
6266// CHECK-LABEL: load_f32x8
@@ -68,7 +72,29 @@ pub unsafe extern "C" fn load_f32x8(mask: m32x8, pointer: *const f32) -> f32x8 {
6872 // x86-avx512-NOT: vpslld
6973 // x86-avx512: vpmovd2m k1, ymm0
7074 // x86-avx512-NEXT: vmovups ymm0 {k1} {z}, ymmword ptr [rdi]
71- simd_masked_load ( mask, pointer, f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) )
75+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Element } > (
76+ mask,
77+ pointer,
78+ f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) ,
79+ )
80+ }
81+
82+ // CHECK-LABEL: load_f32x8_aligned
83+ #[ no_mangle]
84+ pub unsafe extern "C" fn load_f32x8_aligned ( mask : m32x8 , pointer : * const f32 ) -> f32x8 {
85+ // x86-avx2-NOT: vpslld
86+ // x86-avx2: vmaskmovps ymm0, ymm0, ymmword ptr [rdi]
87+ //
88+ // x86-avx512-NOT: vpslld
89+ // x86-avx512: vpmovd2m k1, ymm0
90+ // x86-avx512-NEXT: vmovaps ymm0 {k1} {z}, ymmword ptr [rdi]
91+ //
92+ // this aligned version should generate `movaps` instead of `movups`
93+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Vector } > (
94+ mask,
95+ pointer,
96+ f32x8 ( [ 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 , 0_f32 ] ) ,
97+ )
7298}
7399
74100// CHECK-LABEL: load_f64x4
@@ -79,5 +105,9 @@ pub unsafe extern "C" fn load_f64x4(mask: m64x4, pointer: *const f64) -> f64x4 {
79105 //
80106 // x86-avx512-NOT: vpsllq
81107 // x86-avx512: vpmovq2m k1, ymm0
82- simd_masked_load ( mask, pointer, f64x4 ( [ 0_f64 , 0_f64 , 0_f64 , 0_f64 ] ) )
108+ simd_masked_load :: < _ , _ , _ , { SimdAlign :: Element } > (
109+ mask,
110+ pointer,
111+ f64x4 ( [ 0_f64 , 0_f64 , 0_f64 , 0_f64 ] ) ,
112+ )
83113}
0 commit comments