@@ -682,6 +682,21 @@ define i32 @signbits_cmpss(float %0, float %1) {
682682 ret i32 %4
683683}
684684
685+ define i32 @signbits_cmpss_int (<4 x float > %0 , <4 x float > %1 ) {
686+ ; CHECK-LABEL: signbits_cmpss_int:
687+ ; CHECK: # %bb.0:
688+ ; CHECK-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0
689+ ; CHECK-NEXT: vextractps $0, %xmm0, %eax
690+ ; CHECK-NEXT: sarl $31, %eax
691+ ; CHECK-NEXT: ret{{[l|q]}}
692+ %3 = tail call <4 x float > @llvm.x86.sse.cmp.ss (<4 x float > %0 , <4 x float > %1 , i8 0 )
693+ %4 = bitcast <4 x float > %3 to <4 x i32 >
694+ %5 = extractelement <4 x i32 > %4 , i32 0
695+ %6 = ashr i32 %5 , 31
696+ ret i32 %6
697+ }
698+ declare <4 x float > @llvm.x86.sse.cmp.ss (<4 x float >, <4 x float >, i8 immarg)
699+
685700define i64 @signbits_cmpsd (double %0 , double %1 ) {
686701; X86-LABEL: signbits_cmpsd:
687702; X86: # %bb.0:
@@ -705,6 +720,29 @@ define i64 @signbits_cmpsd(double %0, double %1) {
705720 ret i64 %4
706721}
707722
723+ define i64 @signbits_cmpsd_int (<2 x double > %0 , <2 x double > %1 ) {
724+ ; X86-LABEL: signbits_cmpsd_int:
725+ ; X86: # %bb.0:
726+ ; X86-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
727+ ; X86-NEXT: vextractps $1, %xmm0, %eax
728+ ; X86-NEXT: sarl $31, %eax
729+ ; X86-NEXT: movl %eax, %edx
730+ ; X86-NEXT: retl
731+ ;
732+ ; X64-LABEL: signbits_cmpsd_int:
733+ ; X64: # %bb.0:
734+ ; X64-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
735+ ; X64-NEXT: vmovq %xmm0, %rax
736+ ; X64-NEXT: sarq $63, %rax
737+ ; X64-NEXT: retq
738+ %3 = tail call <2 x double > @llvm.x86.sse2.cmp.sd (<2 x double > %0 , <2 x double > %1 , i8 0 )
739+ %4 = bitcast <2 x double > %3 to <2 x i64 >
740+ %5 = extractelement <2 x i64 > %4 , i32 0
741+ %6 = ashr i64 %5 , 63
742+ ret i64 %6
743+ }
744+ declare <2 x double > @llvm.x86.sse2.cmp.sd (<2 x double >, <2 x double >, i8 immarg)
745+
708746; Make sure we can preserve sign bit information into the second basic block
709747; so we can avoid having to shift bit 0 into bit 7 for each element due to
710748; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires
0 commit comments