@@ -2642,74 +2642,25 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
26422642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
26432643#[ inline]
26442644#[ target_feature( enable = "avx2" ) ]
2645- #[ cfg_attr( test, assert_instr( vpermilps, imm8 = 9 ) ) ]
2646- #[ rustc_args_required_const ( 1 ) ]
2645+ #[ cfg_attr( test, assert_instr( vpermilps, MASK = 9 ) ) ]
2646+ #[ rustc_legacy_const_generics ( 1 ) ]
26472647#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2648- pub unsafe fn _mm256_shuffle_epi32 ( a : __m256i , imm8 : i32 ) -> __m256i {
2649- // simd_shuffleX requires that its selector parameter be made up of
2650- // constant values, but we can't enforce that here. In spirit, we need
2651- // to write a `match` on all possible values of a byte, and for each value,
2652- // hard-code the correct `simd_shuffleX` call using only constants. We
2653- // then hope for LLVM to do the rest.
2654- //
2655- // Of course, that's... awful. So we try to use macros to do it for us.
2656- let imm8 = ( imm8 & 0xFF ) as u8 ;
2657-
2658- let a = a. as_i32x8 ( ) ;
2659- macro_rules! shuffle_done {
2660- ( $x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
2661- simd_shuffle8(
2662- a,
2663- a,
2664- [
2665- $x01,
2666- $x23,
2667- $x45,
2668- $x67,
2669- 4 + $x01,
2670- 4 + $x23,
2671- 4 + $x45,
2672- 4 + $x67,
2673- ] ,
2674- )
2675- } ;
2676- }
2677- macro_rules! shuffle_x67 {
2678- ( $x01: expr, $x23: expr, $x45: expr) => {
2679- match ( imm8 >> 6 ) & 0b11 {
2680- 0b00 => shuffle_done!( $x01, $x23, $x45, 0 ) ,
2681- 0b01 => shuffle_done!( $x01, $x23, $x45, 1 ) ,
2682- 0b10 => shuffle_done!( $x01, $x23, $x45, 2 ) ,
2683- _ => shuffle_done!( $x01, $x23, $x45, 3 ) ,
2684- }
2685- } ;
2686- }
2687- macro_rules! shuffle_x45 {
2688- ( $x01: expr, $x23: expr) => {
2689- match ( imm8 >> 4 ) & 0b11 {
2690- 0b00 => shuffle_x67!( $x01, $x23, 0 ) ,
2691- 0b01 => shuffle_x67!( $x01, $x23, 1 ) ,
2692- 0b10 => shuffle_x67!( $x01, $x23, 2 ) ,
2693- _ => shuffle_x67!( $x01, $x23, 3 ) ,
2694- }
2695- } ;
2696- }
2697- macro_rules! shuffle_x23 {
2698- ( $x01: expr) => {
2699- match ( imm8 >> 2 ) & 0b11 {
2700- 0b00 => shuffle_x45!( $x01, 0 ) ,
2701- 0b01 => shuffle_x45!( $x01, 1 ) ,
2702- 0b10 => shuffle_x45!( $x01, 2 ) ,
2703- _ => shuffle_x45!( $x01, 3 ) ,
2704- }
2705- } ;
2706- }
2707- let r: i32x8 = match imm8 & 0b11 {
2708- 0b00 => shuffle_x23 ! ( 0 ) ,
2709- 0b01 => shuffle_x23 ! ( 1 ) ,
2710- 0b10 => shuffle_x23 ! ( 2 ) ,
2711- _ => shuffle_x23 ! ( 3 ) ,
2712- } ;
2648+ pub unsafe fn _mm256_shuffle_epi32 < const MASK : i32 > ( a : __m256i ) -> __m256i {
2649+ static_assert_imm8 ! ( MASK ) ;
2650+ let r: i32x8 = simd_shuffle8 (
2651+ a. as_i32x8 ( ) ,
2652+ a. as_i32x8 ( ) ,
2653+ [
2654+ MASK as u32 & 0b11 ,
2655+ ( MASK as u32 >> 2 ) & 0b11 ,
2656+ ( MASK as u32 >> 4 ) & 0b11 ,
2657+ ( MASK as u32 >> 6 ) & 0b11 ,
2658+ ( MASK as u32 & 0b11 ) + 4 ,
2659+ ( ( MASK as u32 >> 2 ) & 0b11 ) + 4 ,
2660+ ( ( MASK as u32 >> 4 ) & 0b11 ) + 4 ,
2661+ ( ( MASK as u32 >> 6 ) & 0b11 ) + 4 ,
2662+ ] ,
2663+ ) ;
27132664 transmute ( r)
27142665}
27152666
0 commit comments