@@ -1087,57 +1087,25 @@ pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
10871087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_ps)
10881088#[ inline]
10891089#[ target_feature( enable = "avx" ) ]
1090- #[ cfg_attr( test, assert_instr( vpermilps, imm8 = 9 ) ) ]
1091- #[ rustc_args_required_const ( 1 ) ]
1090+ #[ cfg_attr( test, assert_instr( vpermilps, IMM8 = 9 ) ) ]
1091+ #[ rustc_legacy_const_generics ( 1 ) ]
10921092#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1093- pub unsafe fn _mm256_permute_ps ( a : __m256 , imm8 : i32 ) -> __m256 {
1094- let imm8 = ( imm8 & 0xFF ) as u8 ;
1095- let undefined = _mm256_undefined_ps ( ) ;
1096- macro_rules! shuffle4 {
1097- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
1098- simd_shuffle8(
1099- a,
1100- undefined,
1101- [ $a, $b, $c, $d, $a + 4 , $b + 4 , $c + 4 , $d + 4 ] ,
1102- )
1103- } ;
1104- }
1105- macro_rules! shuffle3 {
1106- ( $a: expr, $b: expr, $c: expr) => {
1107- match ( imm8 >> 6 ) & 0b11 {
1108- 0b00 => shuffle4!( $a, $b, $c, 0 ) ,
1109- 0b01 => shuffle4!( $a, $b, $c, 1 ) ,
1110- 0b10 => shuffle4!( $a, $b, $c, 2 ) ,
1111- _ => shuffle4!( $a, $b, $c, 3 ) ,
1112- }
1113- } ;
1114- }
1115- macro_rules! shuffle2 {
1116- ( $a: expr, $b: expr) => {
1117- match ( imm8 >> 4 ) & 0b11 {
1118- 0b00 => shuffle3!( $a, $b, 0 ) ,
1119- 0b01 => shuffle3!( $a, $b, 1 ) ,
1120- 0b10 => shuffle3!( $a, $b, 2 ) ,
1121- _ => shuffle3!( $a, $b, 3 ) ,
1122- }
1123- } ;
1124- }
1125- macro_rules! shuffle1 {
1126- ( $a: expr) => {
1127- match ( imm8 >> 2 ) & 0b11 {
1128- 0b00 => shuffle2!( $a, 0 ) ,
1129- 0b01 => shuffle2!( $a, 1 ) ,
1130- 0b10 => shuffle2!( $a, 2 ) ,
1131- _ => shuffle2!( $a, 3 ) ,
1132- }
1133- } ;
1134- }
1135- match imm8 & 0b11 {
1136- 0b00 => shuffle1 ! ( 0 ) ,
1137- 0b01 => shuffle1 ! ( 1 ) ,
1138- 0b10 => shuffle1 ! ( 2 ) ,
1139- _ => shuffle1 ! ( 3 ) ,
1140- }
1093+ pub unsafe fn _mm256_permute_ps < const IMM8 : i32 > ( a : __m256 ) -> __m256 {
1094+ static_assert_imm8 ! ( IMM8 ) ;
1095+ simd_shuffle8 (
1096+ a,
1097+ _mm256_undefined_ps ( ) ,
1098+ [
1099+ ( IMM8 as u32 >> 0 ) & 0b11 ,
1100+ ( IMM8 as u32 >> 2 ) & 0b11 ,
1101+ ( IMM8 as u32 >> 4 ) & 0b11 ,
1102+ ( IMM8 as u32 >> 6 ) & 0b11 ,
1103+ ( ( IMM8 as u32 >> 0 ) & 0b11 ) + 4 ,
1104+ ( ( IMM8 as u32 >> 2 ) & 0b11 ) + 4 ,
1105+ ( ( IMM8 as u32 >> 4 ) & 0b11 ) + 4 ,
1106+ ( ( IMM8 as u32 >> 6 ) & 0b11 ) + 4 ,
1107+ ] ,
1108+ )
11411109}
11421110
11431111/// Shuffles single-precision (32-bit) floating-point elements in `a`
@@ -1146,53 +1114,21 @@ pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 {
11461114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_ps)
11471115#[ inline]
11481116#[ target_feature( enable = "avx,sse" ) ]
1149- #[ cfg_attr( test, assert_instr( vpermilps, imm8 = 9 ) ) ]
1150- #[ rustc_args_required_const ( 1 ) ]
1117+ #[ cfg_attr( test, assert_instr( vpermilps, IMM8 = 9 ) ) ]
1118+ #[ rustc_legacy_const_generics ( 1 ) ]
11511119#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1152- pub unsafe fn _mm_permute_ps ( a : __m128 , imm8 : i32 ) -> __m128 {
1153- let imm8 = ( imm8 & 0xFF ) as u8 ;
1154- let undefined = _mm_undefined_ps ( ) ;
1155- macro_rules! shuffle4 {
1156- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
1157- simd_shuffle4( a, undefined, [ $a, $b, $c, $d] )
1158- } ;
1159- }
1160- macro_rules! shuffle3 {
1161- ( $a: expr, $b: expr, $c: expr) => {
1162- match ( imm8 >> 6 ) & 0b11 {
1163- 0b00 => shuffle4!( $a, $b, $c, 0 ) ,
1164- 0b01 => shuffle4!( $a, $b, $c, 1 ) ,
1165- 0b10 => shuffle4!( $a, $b, $c, 2 ) ,
1166- _ => shuffle4!( $a, $b, $c, 3 ) ,
1167- }
1168- } ;
1169- }
1170- macro_rules! shuffle2 {
1171- ( $a: expr, $b: expr) => {
1172- match ( imm8 >> 4 ) & 0b11 {
1173- 0b00 => shuffle3!( $a, $b, 0 ) ,
1174- 0b01 => shuffle3!( $a, $b, 1 ) ,
1175- 0b10 => shuffle3!( $a, $b, 2 ) ,
1176- _ => shuffle3!( $a, $b, 3 ) ,
1177- }
1178- } ;
1179- }
1180- macro_rules! shuffle1 {
1181- ( $a: expr) => {
1182- match ( imm8 >> 2 ) & 0b11 {
1183- 0b00 => shuffle2!( $a, 0 ) ,
1184- 0b01 => shuffle2!( $a, 1 ) ,
1185- 0b10 => shuffle2!( $a, 2 ) ,
1186- _ => shuffle2!( $a, 3 ) ,
1187- }
1188- } ;
1189- }
1190- match imm8 & 0b11 {
1191- 0b00 => shuffle1 ! ( 0 ) ,
1192- 0b01 => shuffle1 ! ( 1 ) ,
1193- 0b10 => shuffle1 ! ( 2 ) ,
1194- _ => shuffle1 ! ( 3 ) ,
1195- }
1120+ pub unsafe fn _mm_permute_ps < const IMM8 : i32 > ( a : __m128 ) -> __m128 {
1121+ static_assert_imm8 ! ( IMM8 ) ;
1122+ simd_shuffle4 (
1123+ a,
1124+ _mm_undefined_ps ( ) ,
1125+ [
1126+ ( IMM8 as u32 >> 0 ) & 0b11 ,
1127+ ( IMM8 as u32 >> 2 ) & 0b11 ,
1128+ ( IMM8 as u32 >> 4 ) & 0b11 ,
1129+ ( IMM8 as u32 >> 6 ) & 0b11 ,
1130+ ] ,
1131+ )
11961132}
11971133
11981134/// Shuffles double-precision (64-bit) floating-point elements in `a`
@@ -1225,45 +1161,21 @@ pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
12251161/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_pd)
12261162#[ inline]
12271163#[ target_feature( enable = "avx" ) ]
1228- #[ cfg_attr( test, assert_instr( vpermilpd, imm8 = 0x1 ) ) ]
1229- #[ rustc_args_required_const ( 1 ) ]
1164+ #[ cfg_attr( test, assert_instr( vpermilpd, IMM4 = 0x1 ) ) ]
1165+ #[ rustc_legacy_const_generics ( 1 ) ]
12301166#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1231- pub unsafe fn _mm256_permute_pd ( a : __m256d , imm8 : i32 ) -> __m256d {
1232- let imm8 = ( imm8 & 0xFF ) as u8 ;
1233- let undefined = _mm256_undefined_pd ( ) ;
1234- macro_rules! shuffle4 {
1235- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
1236- simd_shuffle4( a, undefined, [ $a, $b, $c, $d] )
1237- } ;
1238- }
1239- macro_rules! shuffle3 {
1240- ( $a: expr, $b: expr, $c: expr) => {
1241- match ( imm8 >> 3 ) & 0x1 {
1242- 0 => shuffle4!( $a, $b, $c, 2 ) ,
1243- _ => shuffle4!( $a, $b, $c, 3 ) ,
1244- }
1245- } ;
1246- }
1247- macro_rules! shuffle2 {
1248- ( $a: expr, $b: expr) => {
1249- match ( imm8 >> 2 ) & 0x1 {
1250- 0 => shuffle3!( $a, $b, 2 ) ,
1251- _ => shuffle3!( $a, $b, 3 ) ,
1252- }
1253- } ;
1254- }
1255- macro_rules! shuffle1 {
1256- ( $a: expr) => {
1257- match ( imm8 >> 1 ) & 0x1 {
1258- 0 => shuffle2!( $a, 0 ) ,
1259- _ => shuffle2!( $a, 1 ) ,
1260- }
1261- } ;
1262- }
1263- match imm8 & 0x1 {
1264- 0 => shuffle1 ! ( 0 ) ,
1265- _ => shuffle1 ! ( 1 ) ,
1266- }
1167+ pub unsafe fn _mm256_permute_pd < const IMM4 : i32 > ( a : __m256d ) -> __m256d {
1168+ static_assert_imm4 ! ( IMM4 ) ;
1169+ simd_shuffle4 (
1170+ a,
1171+ _mm256_undefined_pd ( ) ,
1172+ [
1173+ ( ( IMM4 as u32 >> 0 ) & 1 ) ,
1174+ ( ( IMM4 as u32 >> 1 ) & 1 ) ,
1175+ ( ( IMM4 as u32 >> 2 ) & 1 ) + 2 ,
1176+ ( ( IMM4 as u32 >> 3 ) & 1 ) + 2 ,
1177+ ] ,
1178+ )
12671179}
12681180
12691181/// Shuffles double-precision (64-bit) floating-point elements in `a`
@@ -1272,29 +1184,16 @@ pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d {
12721184/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_pd)
12731185#[ inline]
12741186#[ target_feature( enable = "avx,sse2" ) ]
1275- #[ cfg_attr( test, assert_instr( vpermilpd, imm8 = 0x1 ) ) ]
1276- #[ rustc_args_required_const ( 1 ) ]
1187+ #[ cfg_attr( test, assert_instr( vpermilpd, IMM2 = 0x1 ) ) ]
1188+ #[ rustc_legacy_const_generics ( 1 ) ]
12771189#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1278- pub unsafe fn _mm_permute_pd ( a : __m128d , imm8 : i32 ) -> __m128d {
1279- let imm8 = ( imm8 & 0xFF ) as u8 ;
1280- let undefined = _mm_undefined_pd ( ) ;
1281- macro_rules! shuffle2 {
1282- ( $a: expr, $b: expr) => {
1283- simd_shuffle2( a, undefined, [ $a, $b] )
1284- } ;
1285- }
1286- macro_rules! shuffle1 {
1287- ( $a: expr) => {
1288- match ( imm8 >> 1 ) & 0x1 {
1289- 0 => shuffle2!( $a, 0 ) ,
1290- _ => shuffle2!( $a, 1 ) ,
1291- }
1292- } ;
1293- }
1294- match imm8 & 0x1 {
1295- 0 => shuffle1 ! ( 0 ) ,
1296- _ => shuffle1 ! ( 1 ) ,
1297- }
1190+ pub unsafe fn _mm_permute_pd < const IMM2 : i32 > ( a : __m128d ) -> __m128d {
1191+ static_assert_imm2 ! ( IMM2 ) ;
1192+ simd_shuffle2 (
1193+ a,
1194+ _mm_undefined_pd ( ) ,
1195+ [ ( IMM2 as u32 ) & 1 , ( IMM2 as u32 >> 1 ) & 1 ] ,
1196+ )
12981197}
12991198
13001199/// Shuffles 256 bits (composed of 8 packed single-precision (32-bit)
@@ -3784,15 +3683,15 @@ mod tests {
37843683 #[ simd_test( enable = "avx" ) ]
37853684 unsafe fn test_mm256_permute_ps ( ) {
37863685 let a = _mm256_setr_ps ( 4. , 3. , 2. , 5. , 8. , 9. , 64. , 50. ) ;
3787- let r = _mm256_permute_ps ( a , 0x1b ) ;
3686+ let r = _mm256_permute_ps :: < 0x1b > ( a ) ;
37883687 let e = _mm256_setr_ps ( 5. , 2. , 3. , 4. , 50. , 64. , 9. , 8. ) ;
37893688 assert_eq_m256 ( r, e) ;
37903689 }
37913690
37923691 #[ simd_test( enable = "avx" ) ]
37933692 unsafe fn test_mm_permute_ps ( ) {
37943693 let a = _mm_setr_ps ( 4. , 3. , 2. , 5. ) ;
3795- let r = _mm_permute_ps ( a , 0x1b ) ;
3694+ let r = _mm_permute_ps :: < 0x1b > ( a ) ;
37963695 let e = _mm_setr_ps ( 5. , 2. , 3. , 4. ) ;
37973696 assert_eq_m128 ( r, e) ;
37983697 }
@@ -3818,15 +3717,15 @@ mod tests {
38183717 #[ simd_test( enable = "avx" ) ]
38193718 unsafe fn test_mm256_permute_pd ( ) {
38203719 let a = _mm256_setr_pd ( 4. , 3. , 2. , 5. ) ;
3821- let r = _mm256_permute_pd ( a , 5 ) ;
3720+ let r = _mm256_permute_pd :: < 5 > ( a ) ;
38223721 let e = _mm256_setr_pd ( 3. , 4. , 5. , 2. ) ;
38233722 assert_eq_m256d ( r, e) ;
38243723 }
38253724
38263725 #[ simd_test( enable = "avx" ) ]
38273726 unsafe fn test_mm_permute_pd ( ) {
38283727 let a = _mm_setr_pd ( 4. , 3. ) ;
3829- let r = _mm_permute_pd ( a , 1 ) ;
3728+ let r = _mm_permute_pd :: < 1 > ( a ) ;
38303729 let e = _mm_setr_pd ( 3. , 4. ) ;
38313730 assert_eq_m128d ( r, e) ;
38323731 }
0 commit comments