@@ -276,8 +276,8 @@ define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4
276276 ret <2 x double > %6
277277}
278278
279- define float @signbits_ashr_sextinreg_bitops_extract_sitofp (<2 x i64 > %a0 , <2 x i64 > %a1 , i32 %a2 ) nounwind {
280- ; X32-LABEL: signbits_ashr_sextinreg_bitops_extract_sitofp :
279+ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp (<2 x i64 > %a0 , <2 x i64 > %a1 , i32 %a2 ) nounwind {
280+ ; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp :
281281; X32: # BB#0:
282282; X32-NEXT: pushl %ebp
283283; X32-NEXT: movl %esp, %ebp
@@ -300,9 +300,7 @@ define float @signbits_ashr_sextinreg_bitops_extract_sitofp(<2 x i64> %a0, <2 x
300300; X32-NEXT: vpsrad $20, %xmm1, %xmm2
301301; X32-NEXT: vpsrlq $20, %xmm1, %xmm1
302302; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
303- ; X32-NEXT: vpand %xmm1, %xmm0, %xmm2
304- ; X32-NEXT: vpor %xmm1, %xmm2, %xmm1
305- ; X32-NEXT: vpxor %xmm0, %xmm1, %xmm0
303+ ; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
306304; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
307305; X32-NEXT: fildll {{[0-9]+}}(%esp)
308306; X32-NEXT: fstps {{[0-9]+}}(%esp)
@@ -311,7 +309,7 @@ define float @signbits_ashr_sextinreg_bitops_extract_sitofp(<2 x i64> %a0, <2 x
311309; X32-NEXT: popl %ebp
312310; X32-NEXT: retl
313311;
314- ; X64-LABEL: signbits_ashr_sextinreg_bitops_extract_sitofp :
312+ ; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp :
315313; X64: # BB#0:
316314; X64-NEXT: vpsrlq $60, %xmm0, %xmm2
317315; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
@@ -325,21 +323,71 @@ define float @signbits_ashr_sextinreg_bitops_extract_sitofp(<2 x i64> %a0, <2 x
325323; X64-NEXT: vpsrad $20, %xmm1, %xmm2
326324; X64-NEXT: vpsrlq $20, %xmm1, %xmm1
327325; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
326+ ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
327+ ; X64-NEXT: vmovq %xmm0, %rax
328+ ; X64-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
329+ ; X64-NEXT: retq
330+ %1 = ashr <2 x i64 > %a0 , <i64 61 , i64 60 >
331+ %2 = sext i32 %a2 to i64
332+ %3 = insertelement <2 x i64 > %a1 , i64 %2 , i32 0
333+ %4 = shl <2 x i64 > %3 , <i64 20 , i64 20 >
334+ %5 = ashr <2 x i64 > %4 , <i64 20 , i64 20 >
335+ %6 = and <2 x i64 > %1 , %5
336+ %7 = extractelement <2 x i64 > %6 , i32 0
337+ %8 = sitofp i64 %7 to float
338+ ret float %8
339+ }
340+
341+ define float @signbits_ashr_sextvecinreg_bitops_extract_sitofp (<2 x i64 > %a0 , <4 x i32 > %a1 ) nounwind {
342+ ; X32-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
343+ ; X32: # BB#0:
344+ ; X32-NEXT: pushl %ebp
345+ ; X32-NEXT: movl %esp, %ebp
346+ ; X32-NEXT: andl $-8, %esp
347+ ; X32-NEXT: subl $16, %esp
348+ ; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
349+ ; X32-NEXT: vpsrlq $60, %xmm2, %xmm3
350+ ; X32-NEXT: vpsrlq $61, %xmm2, %xmm2
351+ ; X32-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
352+ ; X32-NEXT: vpsrlq $60, %xmm0, %xmm3
353+ ; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
354+ ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
355+ ; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
356+ ; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
357+ ; X32-NEXT: vpmovsxdq %xmm1, %xmm1
358+ ; X32-NEXT: vpand %xmm1, %xmm0, %xmm2
359+ ; X32-NEXT: vpor %xmm1, %xmm2, %xmm1
360+ ; X32-NEXT: vpxor %xmm0, %xmm1, %xmm0
361+ ; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp)
362+ ; X32-NEXT: fildll {{[0-9]+}}(%esp)
363+ ; X32-NEXT: fstps {{[0-9]+}}(%esp)
364+ ; X32-NEXT: flds {{[0-9]+}}(%esp)
365+ ; X32-NEXT: movl %ebp, %esp
366+ ; X32-NEXT: popl %ebp
367+ ; X32-NEXT: retl
368+ ;
369+ ; X64-LABEL: signbits_ashr_sextvecinreg_bitops_extract_sitofp:
370+ ; X64: # BB#0:
371+ ; X64-NEXT: vpsrlq $60, %xmm0, %xmm2
372+ ; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
373+ ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
374+ ; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8]
375+ ; X64-NEXT: vpxor %xmm2, %xmm0, %xmm0
376+ ; X64-NEXT: vpsubq %xmm2, %xmm0, %xmm0
377+ ; X64-NEXT: vpmovsxdq %xmm1, %xmm1
328378; X64-NEXT: vpand %xmm1, %xmm0, %xmm2
329379; X64-NEXT: vpor %xmm1, %xmm2, %xmm1
330380; X64-NEXT: vpxor %xmm0, %xmm1, %xmm0
331381; X64-NEXT: vmovq %xmm0, %rax
332382; X64-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0
333383; X64-NEXT: retq
334- %1 = ashr <2 x i64 > %a0 , <i64 61 , i64 60 >
335- %2 = sext i32 %a2 to i64
336- %3 = insertelement <2 x i64 > %a1 , i64 %2 , i32 0
337- %4 = shl <2 x i64 > %3 , <i64 20 , i64 20 >
338- %5 = ashr <2 x i64 > %4 , <i64 20 , i64 20 >
339- %6 = and <2 x i64 > %1 , %5
340- %7 = or <2 x i64 > %6 , %5
341- %8 = xor <2 x i64 > %7 , %1
342- %9 = extractelement <2 x i64 > %8 , i32 0
343- %10 = sitofp i64 %9 to float
344- ret float %10
384+ %1 = ashr <2 x i64 > %a0 , <i64 61 , i64 60 >
385+ %2 = shufflevector <4 x i32 > %a1 , <4 x i32 > undef , <2 x i32 > <i32 0 , i32 1 >
386+ %3 = sext <2 x i32 > %2 to <2 x i64 >
387+ %4 = and <2 x i64 > %1 , %3
388+ %5 = or <2 x i64 > %4 , %3
389+ %6 = xor <2 x i64 > %5 , %1
390+ %7 = extractelement <2 x i64 > %6 , i32 0
391+ %8 = sitofp i64 %7 to float
392+ ret float %8
345393}
0 commit comments