@@ -19,8 +19,11 @@ define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
1919
2020define i32 @test_upper_x86_sse_movmsk_ps (<4 x float > %a0 ) {
2121; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
22- ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> [[A0:%.*]])
23- ; CHECK-NEXT: ret i32 [[TMP1]]
22+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32>
23+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
24+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
25+ ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
26+ ; CHECK-NEXT: ret i32 [[TMP4]]
2427;
2528 %1 = call i32 @llvm.x86.sse.movmsk.ps (<4 x float > %a0 )
2629 %2 = and i32 %1 , 15
@@ -29,8 +32,11 @@ define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
2932
3033define i32 @test_upper_x86_sse2_movmsk_pd (<2 x double > %a0 ) {
3134; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
32- ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]])
33- ; CHECK-NEXT: ret i32 [[TMP1]]
35+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64>
36+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], zeroinitializer
37+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i1> [[TMP2]] to i2
38+ ; CHECK-NEXT: [[TMP4:%.*]] = zext i2 [[TMP3]] to i32
39+ ; CHECK-NEXT: ret i32 [[TMP4]]
3440;
3541 %1 = call i32 @llvm.x86.sse2.movmsk.pd (<2 x double > %a0 )
3642 %2 = and i32 %1 , 3
@@ -39,8 +45,10 @@ define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
3945
4046define i32 @test_upper_x86_sse2_pmovmskb_128 (<16 x i8 > %a0 ) {
4147; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
42- ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]])
43- ; CHECK-NEXT: ret i32 [[TMP1]]
48+ ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <16 x i8> [[A0:%.*]], zeroinitializer
49+ ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i1> [[TMP1]] to i16
50+ ; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
51+ ; CHECK-NEXT: ret i32 [[TMP3]]
4452;
4553 %1 = call i32 @llvm.x86.sse2.pmovmskb.128 (<16 x i8 > %a0 )
4654 %2 = and i32 %1 , 65535
@@ -49,8 +57,11 @@ define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
4957
5058define i32 @test_upper_x86_avx_movmsk_ps_256 (<8 x float > %a0 ) {
5159; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
52- ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> [[A0:%.*]])
53- ; CHECK-NEXT: ret i32 [[TMP1]]
60+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32>
61+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP1]], zeroinitializer
62+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i1> [[TMP2]] to i8
63+ ; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
64+ ; CHECK-NEXT: ret i32 [[TMP4]]
5465;
5566 %1 = call i32 @llvm.x86.avx.movmsk.ps.256 (<8 x float > %a0 )
5667 %2 = and i32 %1 , 255
@@ -59,8 +70,11 @@ define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
5970
6071define i32 @test_upper_x86_avx_movmsk_pd_256 (<4 x double > %a0 ) {
6172; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
62- ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> [[A0:%.*]])
63- ; CHECK-NEXT: ret i32 [[TMP1]]
73+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64>
74+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i64> [[TMP1]], zeroinitializer
75+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
76+ ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
77+ ; CHECK-NEXT: ret i32 [[TMP4]]
6478;
6579 %1 = call i32 @llvm.x86.avx.movmsk.pd.256 (<4 x double > %a0 )
6680 %2 = and i32 %1 , 15
@@ -382,45 +396,50 @@ define i32 @sext_avx2_pmovmskb(<32 x i1> %x) {
382396 ret i32 %r
383397}
384398
385- ; Negative test - bitcast from scalar.
399+ ; Bitcast from sign-extended scalar.
386400
387401define i32 @sext_sse_movmsk_ps_scalar_source (i1 %x ) {
388402; CHECK-LABEL: @sext_sse_movmsk_ps_scalar_source(
389403; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[X:%.*]] to i128
390- ; CHECK-NEXT: [[BC:%.*]] = bitcast i128 [[SEXT]] to <4 x float>
391- ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> [[BC]])
392- ; CHECK-NEXT: ret i32 [[R]]
404+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[SEXT]] to <4 x i32>
405+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
406+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
407+ ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
408+ ; CHECK-NEXT: ret i32 [[TMP4]]
393409;
394410 %sext = sext i1 %x to i128
395411 %bc = bitcast i128 %sext to <4 x float >
396412 %r = call i32 @llvm.x86.sse.movmsk.ps (<4 x float > %bc )
397413 ret i32 %r
398414}
399415
400- ; Negative test - bitcast from vector type with more elements.
416+ ; Bitcast from vector type with more elements.
401417
402418define i32 @sext_sse_movmsk_ps_too_many_elts (<8 x i1 > %x ) {
403419; CHECK-LABEL: @sext_sse_movmsk_ps_too_many_elts(
404420; CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[X:%.*]] to <8 x i16>
405- ; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i16> [[SEXT]] to <4 x float>
406- ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> [[BC]])
407- ; CHECK-NEXT: ret i32 [[R]]
421+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[SEXT]] to <4 x i32>
422+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
423+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
424+ ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
425+ ; CHECK-NEXT: ret i32 [[TMP4]]
408426;
409427 %sext = sext <8 x i1 > %x to <8 x i16 >
410428 %bc = bitcast <8 x i16 > %sext to <4 x float >
411429 %r = call i32 @llvm.x86.sse.movmsk.ps (<4 x float > %bc )
412430 ret i32 %r
413431}
414432
415- ; TODO: We could handle this by doing a bitcasted sign-bit test after the sext?
416- ; But need to make sure the backend handles that correctly.
433+ ; Handle this by doing a bitcasted sign-bit test after the sext.
417434
418435define i32 @sext_sse_movmsk_ps_must_replicate_bits (<2 x i1 > %x ) {
419436; CHECK-LABEL: @sext_sse_movmsk_ps_must_replicate_bits(
420437; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i64>
421- ; CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <4 x float>
422- ; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> [[BC]])
423- ; CHECK-NEXT: ret i32 [[R]]
438+ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SEXT]] to <4 x i32>
439+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], zeroinitializer
440+ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
441+ ; CHECK-NEXT: [[TMP4:%.*]] = zext i4 [[TMP3]] to i32
442+ ; CHECK-NEXT: ret i32 [[TMP4]]
424443;
425444 %sext = sext <2 x i1 > %x to <2 x i64 >
426445 %bc = bitcast <2 x i64 > %sext to <4 x float >
0 commit comments