@@ -458,44 +458,21 @@ pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
458458// Note: LLVM7 prefers single-precision blend instructions when
459459// possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194
460460// #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))]
461- #[ cfg_attr( test, assert_instr( vblendps, imm8 = 9 ) ) ]
462- #[ rustc_args_required_const( 2 ) ]
463- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
464- pub unsafe fn _mm256_blend_pd ( a : __m256d , b : __m256d , imm8 : i32 ) -> __m256d {
465- let imm8 = ( imm8 & 0xFF ) as u8 ;
466- macro_rules! blend4 {
467- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
468- simd_shuffle4( a, b, [ $a, $b, $c, $d] )
469- } ;
470- }
471- macro_rules! blend3 {
472- ( $a: expr, $b: expr, $c: expr) => {
473- match imm8 & 0x8 {
474- 0 => blend4!( $a, $b, $c, 3 ) ,
475- _ => blend4!( $a, $b, $c, 7 ) ,
476- }
477- } ;
478- }
479- macro_rules! blend2 {
480- ( $a: expr, $b: expr) => {
481- match imm8 & 0x4 {
482- 0 => blend3!( $a, $b, 2 ) ,
483- _ => blend3!( $a, $b, 6 ) ,
484- }
485- } ;
486- }
487- macro_rules! blend1 {
488- ( $a: expr) => {
489- match imm8 & 0x2 {
490- 0 => blend2!( $a, 1 ) ,
491- _ => blend2!( $a, 5 ) ,
492- }
493- } ;
494- }
495- match imm8 & 0x1 {
496- 0 => blend1 ! ( 0 ) ,
497- _ => blend1 ! ( 4 ) ,
498- }
461+ #[ cfg_attr( test, assert_instr( vblendps, IMM4 = 9 ) ) ]
462+ #[ rustc_legacy_const_generics( 2 ) ]
463+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
464+ pub unsafe fn _mm256_blend_pd < const IMM4 : i32 > ( a : __m256d , b : __m256d ) -> __m256d {
465+ static_assert_imm4 ! ( IMM4 ) ;
466+ simd_shuffle4 (
467+ a,
468+ b,
469+ [
470+ ( ( IMM4 as u32 >> 0 ) & 1 ) * 4 + 0 ,
471+ ( ( IMM4 as u32 >> 1 ) & 1 ) * 4 + 1 ,
472+ ( ( IMM4 as u32 >> 2 ) & 1 ) * 4 + 2 ,
473+ ( ( IMM4 as u32 >> 3 ) & 1 ) * 4 + 3 ,
474+ ] ,
475+ )
499476}
500477
501478/// Blends packed single-precision (32-bit) floating-point elements from
@@ -504,61 +481,25 @@ pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
504481/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_ps)
505482#[ inline]
506483#[ target_feature( enable = "avx" ) ]
507- #[ cfg_attr( test, assert_instr( vblendps, imm8 = 9 ) ) ]
508- #[ rustc_args_required_const( 2 ) ]
509- #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
510- pub unsafe fn _mm256_blend_ps ( a : __m256 , b : __m256 , imm8 : i32 ) -> __m256 {
511- let imm8 = ( imm8 & 0xFF ) as u8 ;
512- macro_rules! blend4 {
513- (
514- $a: expr,
515- $b: expr,
516- $c: expr,
517- $d: expr,
518- $e: expr,
519- $f: expr,
520- $g: expr,
521- $h: expr
522- ) => {
523- simd_shuffle8( a, b, [ $a, $b, $c, $d, $e, $f, $g, $h] )
524- } ;
525- }
526- macro_rules! blend3 {
527- ( $a: expr, $b: expr, $c: expr, $d: expr, $e: expr, $f: expr) => {
528- match ( imm8 >> 6 ) & 0b11 {
529- 0b00 => blend4!( $a, $b, $c, $d, $e, $f, 6 , 7 ) ,
530- 0b01 => blend4!( $a, $b, $c, $d, $e, $f, 14 , 7 ) ,
531- 0b10 => blend4!( $a, $b, $c, $d, $e, $f, 6 , 15 ) ,
532- _ => blend4!( $a, $b, $c, $d, $e, $f, 14 , 15 ) ,
533- }
534- } ;
535- }
536- macro_rules! blend2 {
537- ( $a: expr, $b: expr, $c: expr, $d: expr) => {
538- match ( imm8 >> 4 ) & 0b11 {
539- 0b00 => blend3!( $a, $b, $c, $d, 4 , 5 ) ,
540- 0b01 => blend3!( $a, $b, $c, $d, 12 , 5 ) ,
541- 0b10 => blend3!( $a, $b, $c, $d, 4 , 13 ) ,
542- _ => blend3!( $a, $b, $c, $d, 12 , 13 ) ,
543- }
544- } ;
545- }
546- macro_rules! blend1 {
547- ( $a: expr, $b: expr) => {
548- match ( imm8 >> 2 ) & 0b11 {
549- 0b00 => blend2!( $a, $b, 2 , 3 ) ,
550- 0b01 => blend2!( $a, $b, 10 , 3 ) ,
551- 0b10 => blend2!( $a, $b, 2 , 11 ) ,
552- _ => blend2!( $a, $b, 10 , 11 ) ,
553- }
554- } ;
555- }
556- match imm8 & 0b11 {
557- 0b00 => blend1 ! ( 0 , 1 ) ,
558- 0b01 => blend1 ! ( 8 , 1 ) ,
559- 0b10 => blend1 ! ( 0 , 9 ) ,
560- _ => blend1 ! ( 8 , 9 ) ,
561- }
484+ #[ cfg_attr( test, assert_instr( vblendps, IMM8 = 9 ) ) ]
485+ #[ rustc_legacy_const_generics( 2 ) ]
486+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
487+ pub unsafe fn _mm256_blend_ps < const IMM8 : i32 > ( a : __m256 , b : __m256 ) -> __m256 {
488+ static_assert_imm8 ! ( IMM8 ) ;
489+ simd_shuffle8 (
490+ a,
491+ b,
492+ [
493+ ( ( IMM8 as u32 >> 0 ) & 1 ) * 8 + 0 ,
494+ ( ( IMM8 as u32 >> 1 ) & 1 ) * 8 + 1 ,
495+ ( ( IMM8 as u32 >> 2 ) & 1 ) * 8 + 2 ,
496+ ( ( IMM8 as u32 >> 3 ) & 1 ) * 8 + 3 ,
497+ ( ( IMM8 as u32 >> 4 ) & 1 ) * 8 + 4 ,
498+ ( ( IMM8 as u32 >> 5 ) & 1 ) * 8 + 5 ,
499+ ( ( IMM8 as u32 >> 6 ) & 1 ) * 8 + 6 ,
500+ ( ( IMM8 as u32 >> 7 ) & 1 ) * 8 + 7 ,
501+ ] ,
502+ )
562503}
563504
564505/// Blends packed double-precision (64-bit) floating-point elements from
@@ -3378,23 +3319,23 @@ mod tests {
33783319 unsafe fn test_mm256_blend_pd ( ) {
33793320 let a = _mm256_setr_pd ( 4. , 9. , 16. , 25. ) ;
33803321 let b = _mm256_setr_pd ( 4. , 3. , 2. , 5. ) ;
3381- let r = _mm256_blend_pd ( a, b, 0x0 ) ;
3322+ let r = _mm256_blend_pd :: < 0x0 > ( a, b) ;
33823323 assert_eq_m256d ( r, _mm256_setr_pd ( 4. , 9. , 16. , 25. ) ) ;
3383- let r = _mm256_blend_pd ( a, b, 0x3 ) ;
3324+ let r = _mm256_blend_pd :: < 0x3 > ( a, b) ;
33843325 assert_eq_m256d ( r, _mm256_setr_pd ( 4. , 3. , 16. , 25. ) ) ;
3385- let r = _mm256_blend_pd ( a, b, 0xF ) ;
3326+ let r = _mm256_blend_pd :: < 0xF > ( a, b) ;
33863327 assert_eq_m256d ( r, _mm256_setr_pd ( 4. , 3. , 2. , 5. ) ) ;
33873328 }
33883329
33893330 #[ simd_test( enable = "avx" ) ]
33903331 unsafe fn test_mm256_blend_ps ( ) {
33913332 let a = _mm256_setr_ps ( 1. , 4. , 5. , 8. , 9. , 12. , 13. , 16. ) ;
33923333 let b = _mm256_setr_ps ( 2. , 3. , 6. , 7. , 10. , 11. , 14. , 15. ) ;
3393- let r = _mm256_blend_ps ( a, b, 0x0 ) ;
3334+ let r = _mm256_blend_ps :: < 0x0 > ( a, b) ;
33943335 assert_eq_m256 ( r, _mm256_setr_ps ( 1. , 4. , 5. , 8. , 9. , 12. , 13. , 16. ) ) ;
3395- let r = _mm256_blend_ps ( a, b, 0x3 ) ;
3336+ let r = _mm256_blend_ps :: < 0x3 > ( a, b) ;
33963337 assert_eq_m256 ( r, _mm256_setr_ps ( 2. , 3. , 5. , 8. , 9. , 12. , 13. , 16. ) ) ;
3397- let r = _mm256_blend_ps ( a, b, 0xF ) ;
3338+ let r = _mm256_blend_ps :: < 0xF > ( a, b) ;
33983339 assert_eq_m256 ( r, _mm256_setr_ps ( 2. , 3. , 6. , 7. , 9. , 12. , 13. , 16. ) ) ;
33993340 }
34003341
0 commit comments