@@ -512,13 +512,10 @@ define <4 x float> @add_v4f32_u123(<4 x float> %a, <4 x float> %b) {
512512
513513define <4 x float > @add_v4f32_0u23 (<4 x float > %a , <4 x float > %b ) {
514514; CHECK-LABEL: @add_v4f32_0u23(
515- ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
516- ; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
517- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 poison>
518- ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
515+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 5, i32 6>
516+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 poison, i32 4, i32 7>
519517; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
520- ; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 poison, i32 4, i32 5>
521- ; CHECK-NEXT: ret <4 x float> [[RESULT1]]
518+ ; CHECK-NEXT: ret <4 x float> [[TMP4]]
522519;
523520 %a0 = extractelement <4 x float > %a , i32 0
524521 %a1 = extractelement <4 x float > %a , i32 1
@@ -542,13 +539,10 @@ define <4 x float> @add_v4f32_0u23(<4 x float> %a, <4 x float> %b) {
542539
543540define <4 x float > @add_v4f32_01u3 (<4 x float > %a , <4 x float > %b ) {
544541; SSE2-LABEL: @add_v4f32_01u3(
545- ; SSE2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
546- ; SSE2-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
547- ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B:%.*]], <4 x i32> <i32 2, i32 poison, i32 6, i32 poison>
548- ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 3, i32 poison, i32 7, i32 poison>
542+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6>
543+ ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7>
549544; SSE2-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
550- ; SSE2-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
551- ; SSE2-NEXT: ret <4 x float> [[RESULT1]]
545+ ; SSE2-NEXT: ret <4 x float> [[TMP4]]
552546;
553547; SSE4-LABEL: @add_v4f32_01u3(
554548; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 1, i32 2, i32 poison, i32 6>
@@ -563,13 +557,10 @@ define <4 x float> @add_v4f32_01u3(<4 x float> %a, <4 x float> %b) {
563557; AVX2-NEXT: ret <4 x float> [[TMP4]]
564558;
565559; AVX512-LABEL: @add_v4f32_01u3(
566- ; AVX512-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
567- ; AVX512-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
568- ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B:%.*]], <4 x i32> <i32 2, i32 poison, i32 6, i32 poison>
569- ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 3, i32 poison, i32 7, i32 poison>
560+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 poison, i32 6>
561+ ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 poison, i32 7>
570562; AVX512-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
571- ; AVX512-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 poison, i32 6>
572- ; AVX512-NEXT: ret <4 x float> [[RESULT1]]
563+ ; AVX512-NEXT: ret <4 x float> [[TMP4]]
573564;
574565 %a0 = extractelement <4 x float > %a , i32 0
575566 %a1 = extractelement <4 x float > %a , i32 1
@@ -593,13 +584,10 @@ define <4 x float> @add_v4f32_01u3(<4 x float> %a, <4 x float> %b) {
593584
594585define <4 x float > @add_v4f32_012u (<4 x float > %a , <4 x float > %b ) {
595586; SSE2-LABEL: @add_v4f32_012u(
596- ; SSE2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
597- ; SSE2-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
598- ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B:%.*]], <4 x i32> <i32 2, i32 4, i32 poison, i32 poison>
599- ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 3, i32 5, i32 poison, i32 poison>
587+ ; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison>
588+ ; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison>
600589; SSE2-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
601- ; SSE2-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
602- ; SSE2-NEXT: ret <4 x float> [[RESULT1]]
590+ ; SSE2-NEXT: ret <4 x float> [[TMP4]]
603591;
604592; SSE4-LABEL: @add_v4f32_012u(
605593; SSE4-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
@@ -620,13 +608,10 @@ define <4 x float> @add_v4f32_012u(<4 x float> %a, <4 x float> %b) {
620608; AVX2-NEXT: ret <4 x float> [[RESULT]]
621609;
622610; AVX512-LABEL: @add_v4f32_012u(
623- ; AVX512-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
624- ; AVX512-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
625- ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B:%.*]], <4 x i32> <i32 2, i32 4, i32 poison, i32 poison>
626- ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 3, i32 5, i32 poison, i32 poison>
611+ ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 poison>
612+ ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 3, i32 5, i32 poison>
627613; AVX512-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
628- ; AVX512-NEXT: [[RESULT1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 5, i32 poison>
629- ; AVX512-NEXT: ret <4 x float> [[RESULT1]]
614+ ; AVX512-NEXT: ret <4 x float> [[TMP4]]
630615;
631616 %a0 = extractelement <4 x float > %a , i32 0
632617 %a1 = extractelement <4 x float > %a , i32 1
0 commit comments