@@ -2565,35 +2565,65 @@ pub unsafe fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
25652565/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256)
25662566#[ inline]
25672567#[ target_feature( enable = "avx2" ) ]
2568- #[ cfg_attr( test, assert_instr( vpslldq, imm8 = 3 ) ) ]
2569- #[ rustc_args_required_const ( 1 ) ]
2568+ #[ cfg_attr( test, assert_instr( vpslldq, IMM8 = 3 ) ) ]
2569+ #[ rustc_legacy_const_generics ( 1 ) ]
25702570#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2571- pub unsafe fn _mm256_slli_si256 ( a : __m256i , imm8 : i32 ) -> __m256i {
2572- let a = a. as_i64x4 ( ) ;
2573- macro_rules! call {
2574- ( $imm8: expr) => {
2575- vpslldq( a, $imm8)
2576- } ;
2577- }
2578- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2571+ pub unsafe fn _mm256_slli_si256 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2572+ static_assert_imm8 ! ( IMM8 ) ;
2573+ _mm256_bslli_epi128 :: < IMM8 > ( a)
25792574}
25802575
25812576/// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
25822577///
25832578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128)
25842579#[ inline]
25852580#[ target_feature( enable = "avx2" ) ]
2586- #[ cfg_attr( test, assert_instr( vpslldq, imm8 = 3 ) ) ]
2587- #[ rustc_args_required_const ( 1 ) ]
2581+ #[ cfg_attr( test, assert_instr( vpslldq, IMM8 = 3 ) ) ]
2582+ #[ rustc_legacy_const_generics ( 1 ) ]
25882583#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2589- pub unsafe fn _mm256_bslli_epi128 ( a : __m256i , imm8 : i32 ) -> __m256i {
2590- let a = a. as_i64x4 ( ) ;
2591- macro_rules! call {
2592- ( $imm8: expr) => {
2593- vpslldq( a, $imm8)
2594- } ;
2595- }
2596- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2584+ pub unsafe fn _mm256_bslli_epi128 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2585+ static_assert_imm8 ! ( IMM8 ) ;
2586+ let a = a. as_i8x32 ( ) ;
2587+ let zero = _mm256_setzero_si256 ( ) . as_i8x32 ( ) ;
2588+ let r: i8x32 = simd_shuffle32 (
2589+ zero,
2590+ a,
2591+ [
2592+ 32 - ( IMM8 as u32 & 0xff ) ,
2593+ 33 - ( IMM8 as u32 & 0xff ) ,
2594+ 34 - ( IMM8 as u32 & 0xff ) ,
2595+ 35 - ( IMM8 as u32 & 0xff ) ,
2596+ 36 - ( IMM8 as u32 & 0xff ) ,
2597+ 37 - ( IMM8 as u32 & 0xff ) ,
2598+ 38 - ( IMM8 as u32 & 0xff ) ,
2599+ 39 - ( IMM8 as u32 & 0xff ) ,
2600+ 40 - ( IMM8 as u32 & 0xff ) ,
2601+ 41 - ( IMM8 as u32 & 0xff ) ,
2602+ 42 - ( IMM8 as u32 & 0xff ) ,
2603+ 43 - ( IMM8 as u32 & 0xff ) ,
2604+ 44 - ( IMM8 as u32 & 0xff ) ,
2605+ 45 - ( IMM8 as u32 & 0xff ) ,
2606+ 46 - ( IMM8 as u32 & 0xff ) ,
2607+ 47 - ( IMM8 as u32 & 0xff ) ,
2608+ 48 - ( IMM8 as u32 & 0xff ) - 16 ,
2609+ 49 - ( IMM8 as u32 & 0xff ) - 16 ,
2610+ 50 - ( IMM8 as u32 & 0xff ) - 16 ,
2611+ 51 - ( IMM8 as u32 & 0xff ) - 16 ,
2612+ 52 - ( IMM8 as u32 & 0xff ) - 16 ,
2613+ 53 - ( IMM8 as u32 & 0xff ) - 16 ,
2614+ 54 - ( IMM8 as u32 & 0xff ) - 16 ,
2615+ 55 - ( IMM8 as u32 & 0xff ) - 16 ,
2616+ 56 - ( IMM8 as u32 & 0xff ) - 16 ,
2617+ 57 - ( IMM8 as u32 & 0xff ) - 16 ,
2618+ 58 - ( IMM8 as u32 & 0xff ) - 16 ,
2619+ 59 - ( IMM8 as u32 & 0xff ) - 16 ,
2620+ 60 - ( IMM8 as u32 & 0xff ) - 16 ,
2621+ 61 - ( IMM8 as u32 & 0xff ) - 16 ,
2622+ 62 - ( IMM8 as u32 & 0xff ) - 16 ,
2623+ 63 - ( IMM8 as u32 & 0xff ) - 16 ,
2624+ ] ,
2625+ ) ;
2626+ transmute ( r)
25972627}
25982628
25992629/// Shifts packed 32-bit integers in `a` left by the amount
@@ -2729,35 +2759,158 @@ pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
27292759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256)
27302760#[ inline]
27312761#[ target_feature( enable = "avx2" ) ]
2732- #[ cfg_attr( test, assert_instr( vpsrldq, imm8 = 3 ) ) ]
2733- #[ rustc_args_required_const ( 1 ) ]
2762+ #[ cfg_attr( test, assert_instr( vpsrldq, IMM8 = 1 ) ) ]
2763+ #[ rustc_legacy_const_generics ( 1 ) ]
27342764#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2735- pub unsafe fn _mm256_srli_si256 ( a : __m256i , imm8 : i32 ) -> __m256i {
2736- let a = a. as_i64x4 ( ) ;
2737- macro_rules! call {
2738- ( $imm8: expr) => {
2739- vpsrldq( a, $imm8)
2740- } ;
2741- }
2742- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2765+ pub unsafe fn _mm256_srli_si256 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2766+ static_assert_imm8 ! ( IMM8 ) ;
2767+ _mm256_bsrli_epi128 :: < IMM8 > ( a)
27432768}
27442769
27452770/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
27462771///
27472772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128)
27482773#[ inline]
27492774#[ target_feature( enable = "avx2" ) ]
2750- #[ cfg_attr( test, assert_instr( vpsrldq, imm8 = 3 ) ) ]
2751- #[ rustc_args_required_const ( 1 ) ]
2775+ #[ cfg_attr( test, assert_instr( vpsrldq, IMM8 = 1 ) ) ]
2776+ #[ rustc_legacy_const_generics ( 1 ) ]
27522777#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2753- pub unsafe fn _mm256_bsrli_epi128 ( a : __m256i , imm8 : i32 ) -> __m256i {
2754- let a = a. as_i64x4 ( ) ;
2755- macro_rules! call {
2756- ( $imm8: expr) => {
2757- vpsrldq( a, $imm8)
2758- } ;
2759- }
2760- transmute ( constify_imm8 ! ( imm8 * 8 , call) )
2778+ pub unsafe fn _mm256_bsrli_epi128 < const IMM8 : i32 > ( a : __m256i ) -> __m256i {
2779+ static_assert_imm8 ! ( IMM8 ) ;
2780+ let a = a. as_i8x32 ( ) ;
2781+ let zero = _mm256_setzero_si256 ( ) . as_i8x32 ( ) ;
2782+ let r: i8x32 = match IMM8 % 16 {
2783+ 0 => simd_shuffle32 (
2784+ a,
2785+ zero,
2786+ [
2787+ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 , 19 , 20 , 21 , 22 ,
2788+ 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
2789+ ] ,
2790+ ) ,
2791+ 1 => simd_shuffle32 (
2792+ a,
2793+ zero,
2794+ [
2795+ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
2796+ 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 ,
2797+ ] ,
2798+ ) ,
2799+ 2 => simd_shuffle32 (
2800+ a,
2801+ zero,
2802+ [
2803+ 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 18 , 19 , 20 , 21 , 22 , 23 , 24 ,
2804+ 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 , 32 ,
2805+ ] ,
2806+ ) ,
2807+ 3 => simd_shuffle32 (
2808+ a,
2809+ zero,
2810+ [
2811+ 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 19 , 20 , 21 , 22 , 23 , 24 ,
2812+ 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 , 32 , 32 ,
2813+ ] ,
2814+ ) ,
2815+ 4 => simd_shuffle32 (
2816+ a,
2817+ zero,
2818+ [
2819+ 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 20 , 21 , 22 , 23 , 24 , 25 ,
2820+ 26 , 27 , 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 ,
2821+ ] ,
2822+ ) ,
2823+ 5 => simd_shuffle32 (
2824+ a,
2825+ zero,
2826+ [
2827+ 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 21 , 22 , 23 , 24 , 25 , 26 ,
2828+ 27 , 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 ,
2829+ ] ,
2830+ ) ,
2831+ 6 => simd_shuffle32 (
2832+ a,
2833+ zero,
2834+ [
2835+ 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 22 , 23 , 24 , 25 , 26 , 27 ,
2836+ 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 ,
2837+ ] ,
2838+ ) ,
2839+ 7 => simd_shuffle32 (
2840+ a,
2841+ zero,
2842+ [
2843+ 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 23 , 24 , 25 , 26 , 27 ,
2844+ 28 , 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2845+ ] ,
2846+ ) ,
2847+ 8 => simd_shuffle32 (
2848+ a,
2849+ zero,
2850+ [
2851+ 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 24 , 25 , 26 , 27 , 28 ,
2852+ 29 , 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2853+ ] ,
2854+ ) ,
2855+ 9 => simd_shuffle32 (
2856+ a,
2857+ zero,
2858+ [
2859+ 9 , 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 25 , 26 , 27 , 28 , 29 ,
2860+ 30 , 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2861+ ] ,
2862+ ) ,
2863+ 10 => simd_shuffle32 (
2864+ a,
2865+ zero,
2866+ [
2867+ 10 , 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 26 , 27 , 28 , 29 , 30 ,
2868+ 31 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2869+ ] ,
2870+ ) ,
2871+ 11 => simd_shuffle32 (
2872+ a,
2873+ zero,
2874+ [
2875+ 11 , 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 27 , 28 , 29 , 30 , 31 ,
2876+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2877+ ] ,
2878+ ) ,
2879+ 12 => simd_shuffle32 (
2880+ a,
2881+ zero,
2882+ [
2883+ 12 , 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 28 , 29 , 30 , 31 , 32 ,
2884+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2885+ ] ,
2886+ ) ,
2887+ 13 => simd_shuffle32 (
2888+ a,
2889+ zero,
2890+ [
2891+ 13 , 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 29 , 30 , 31 , 32 , 32 ,
2892+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2893+ ] ,
2894+ ) ,
2895+ 14 => simd_shuffle32 (
2896+ a,
2897+ zero,
2898+ [
2899+ 14 , 15 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 30 , 31 , 32 , 32 , 32 ,
2900+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2901+ ] ,
2902+ ) ,
2903+ 15 => simd_shuffle32 (
2904+ a,
2905+ zero,
2906+ [
2907+ 14 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 31 , 32 , 32 , 32 , 32 ,
2908+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
2909+ ] ,
2910+ ) ,
2911+ _ => zero,
2912+ } ;
2913+ transmute ( r)
27612914}
27622915
27632916/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
@@ -4824,7 +4977,7 @@ mod tests {
48244977 #[ simd_test( enable = "avx2" ) ]
48254978 unsafe fn test_mm256_slli_si256 ( ) {
48264979 let a = _mm256_set1_epi64x ( 0xFFFFFFFF ) ;
4827- let r = _mm256_slli_si256 ( a , 3 ) ;
4980+ let r = _mm256_slli_si256 :: < 3 > ( a ) ;
48284981 assert_eq_m256i ( r, _mm256_set1_epi64x ( 0xFFFFFFFF000000 ) ) ;
48294982 }
48304983
@@ -4923,7 +5076,7 @@ mod tests {
49235076 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 ,
49245077 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 ,
49255078 ) ;
4926- let r = _mm256_srli_si256 ( a , 3 ) ;
5079+ let r = _mm256_srli_si256 :: < 3 > ( a ) ;
49275080 #[ rustfmt:: skip]
49285081 let e = _mm256_setr_epi8 (
49295082 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 ,
0 commit comments