Skip to content

Commit e784146

Browse files
committed
[X86] Check Subtarget.hasSSE3() before calling shouldUseHorizontalOp and emitting X86ISD::FHADD in LowerUINT_TO_FP_i64.
This was a regression from r375341. Fixes PR43729. llvm-svn: 375381
1 parent e884843 commit e784146

File tree

2 files changed

+105
-1
lines changed

2 files changed

+105
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18591,7 +18591,7 @@ static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG,
1859118591
SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
1859218592
SDValue Result;
1859318593

18594-
if (shouldUseHorizontalOp(true, DAG, Subtarget)) {
18594+
if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
1859518595
Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
1859618596
} else {
1859718597
SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});

llvm/test/CodeGen/X86/scalar-int-to-fp.ll

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,110 @@ define double @u64_to_d(i64 %a) nounwind {
670670
ret double %r
671671
}
672672

673+
define double @u64_to_d_optsize(i64 %a) nounwind optsize {
674+
; AVX512DQVL_32-LABEL: u64_to_d_optsize:
675+
; AVX512DQVL_32: # %bb.0:
676+
; AVX512DQVL_32-NEXT: pushl %ebp
677+
; AVX512DQVL_32-NEXT: movl %esp, %ebp
678+
; AVX512DQVL_32-NEXT: andl $-8, %esp
679+
; AVX512DQVL_32-NEXT: subl $8, %esp
680+
; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
681+
; AVX512DQVL_32-NEXT: vcvtuqq2pd %ymm0, %ymm0
682+
; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp)
683+
; AVX512DQVL_32-NEXT: fldl (%esp)
684+
; AVX512DQVL_32-NEXT: movl %ebp, %esp
685+
; AVX512DQVL_32-NEXT: popl %ebp
686+
; AVX512DQVL_32-NEXT: vzeroupper
687+
; AVX512DQVL_32-NEXT: retl
688+
;
689+
; AVX512_64-LABEL: u64_to_d_optsize:
690+
; AVX512_64: # %bb.0:
691+
; AVX512_64-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
692+
; AVX512_64-NEXT: retq
693+
;
694+
; AVX512DQ_32-LABEL: u64_to_d_optsize:
695+
; AVX512DQ_32: # %bb.0:
696+
; AVX512DQ_32-NEXT: pushl %ebp
697+
; AVX512DQ_32-NEXT: movl %esp, %ebp
698+
; AVX512DQ_32-NEXT: andl $-8, %esp
699+
; AVX512DQ_32-NEXT: subl $8, %esp
700+
; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
701+
; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0
702+
; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp)
703+
; AVX512DQ_32-NEXT: fldl (%esp)
704+
; AVX512DQ_32-NEXT: movl %ebp, %esp
705+
; AVX512DQ_32-NEXT: popl %ebp
706+
; AVX512DQ_32-NEXT: vzeroupper
707+
; AVX512DQ_32-NEXT: retl
708+
;
709+
; AVX512F_32-LABEL: u64_to_d_optsize:
710+
; AVX512F_32: # %bb.0:
711+
; AVX512F_32-NEXT: pushl %ebp
712+
; AVX512F_32-NEXT: movl %esp, %ebp
713+
; AVX512F_32-NEXT: andl $-8, %esp
714+
; AVX512F_32-NEXT: subl $8, %esp
715+
; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
716+
; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
717+
; AVX512F_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0
718+
; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
719+
; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp)
720+
; AVX512F_32-NEXT: fldl (%esp)
721+
; AVX512F_32-NEXT: movl %ebp, %esp
722+
; AVX512F_32-NEXT: popl %ebp
723+
; AVX512F_32-NEXT: retl
724+
;
725+
; SSE2_32-LABEL: u64_to_d_optsize:
726+
; SSE2_32: # %bb.0:
727+
; SSE2_32-NEXT: pushl %ebp
728+
; SSE2_32-NEXT: movl %esp, %ebp
729+
; SSE2_32-NEXT: andl $-8, %esp
730+
; SSE2_32-NEXT: subl $8, %esp
731+
; SSE2_32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
732+
; SSE2_32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
733+
; SSE2_32-NEXT: subpd {{\.LCPI.*}}, %xmm0
734+
; SSE2_32-NEXT: movapd %xmm0, %xmm1
735+
; SSE2_32-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
736+
; SSE2_32-NEXT: addsd %xmm0, %xmm1
737+
; SSE2_32-NEXT: movsd %xmm1, (%esp)
738+
; SSE2_32-NEXT: fldl (%esp)
739+
; SSE2_32-NEXT: movl %ebp, %esp
740+
; SSE2_32-NEXT: popl %ebp
741+
; SSE2_32-NEXT: retl
742+
;
743+
; SSE2_64-LABEL: u64_to_d_optsize:
744+
; SSE2_64: # %bb.0:
745+
; SSE2_64-NEXT: movq %rdi, %xmm1
746+
; SSE2_64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
747+
; SSE2_64-NEXT: subpd {{.*}}(%rip), %xmm1
748+
; SSE2_64-NEXT: movapd %xmm1, %xmm0
749+
; SSE2_64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
750+
; SSE2_64-NEXT: addsd %xmm1, %xmm0
751+
; SSE2_64-NEXT: retq
752+
;
753+
; X87-LABEL: u64_to_d_optsize:
754+
; X87: # %bb.0:
755+
; X87-NEXT: pushl %ebp
756+
; X87-NEXT: movl %esp, %ebp
757+
; X87-NEXT: andl $-8, %esp
758+
; X87-NEXT: subl $16, %esp
759+
; X87-NEXT: movl 8(%ebp), %eax
760+
; X87-NEXT: movl 12(%ebp), %ecx
761+
; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp)
762+
; X87-NEXT: movl %eax, (%esp)
763+
; X87-NEXT: xorl %eax, %eax
764+
; X87-NEXT: testl %ecx, %ecx
765+
; X87-NEXT: setns %al
766+
; X87-NEXT: fildll (%esp)
767+
; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4)
768+
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
769+
; X87-NEXT: fldl {{[0-9]+}}(%esp)
770+
; X87-NEXT: movl %ebp, %esp
771+
; X87-NEXT: popl %ebp
772+
; X87-NEXT: retl
773+
%r = uitofp i64 %a to double
774+
ret double %r
775+
}
776+
673777
define double @s64_to_d(i64 %a) nounwind {
674778
; AVX512DQVL_32-LABEL: s64_to_d:
675779
; AVX512DQVL_32: # %bb.0:

0 commit comments

Comments
 (0)