Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 7effc71

Browse files
committed
[x86] fix predicate for avoiding vblendv
It only makes sense to produce the logic ops when 1 of the constants is +0.0. Otherwise, go with vblendv to reduce code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@347403 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 966966c commit 7effc71

File tree

2 files changed

+6
-13
lines changed

2 files changed

+6
-13
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19632,21 +19632,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1963219632
// of 3 logic instructions for size savings and potentially speed.
1963319633
// Unfortunately, there is no scalar form of VBLENDV.
1963419634

19635-
// If either operand is a constant, don't try this. We can expect to
19635+
// If either operand is a +0.0 constant, don't try this. We can expect to
1963619636
// optimize away at least one of the logic instructions later in that
1963719637
// case, so that sequence would be faster than a variable blend.
1963819638

1963919639
// BLENDV was introduced with SSE 4.1, but the 2 register form implicitly
1964019640
// uses XMM0 as the selection register. That may need just as many
1964119641
// instructions as the AND/ANDN/OR sequence due to register moves, so
1964219642
// don't bother.
19643-
19644-
if (Subtarget.hasAVX() &&
19645-
!isa<ConstantFPSDNode>(Op1) && !isa<ConstantFPSDNode>(Op2)) {
19646-
19643+
if (Subtarget.hasAVX() && !isNullFPConstant(Op1) &&
19644+
!isNullFPConstant(Op2)) {
1964719645
// Convert to vectors, do a VSELECT, and convert back to scalar.
1964819646
// All of the conversions should be optimized away.
19649-
1965019647
MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
1965119648
SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
1965219649
SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);

test/CodeGen/X86/vselect-zero.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,8 @@ define double @fsel_nonzero_false_val(double %x, double %y, double %z) {
8888
; AVX-LABEL: fsel_nonzero_false_val:
8989
; AVX: # %bb.0:
9090
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
91-
; AVX-NEXT: vandpd %xmm2, %xmm0, %xmm1
92-
; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
93-
; AVX-NEXT: vandnpd %xmm2, %xmm0, %xmm0
94-
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
91+
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
92+
; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0
9593
; AVX-NEXT: retq
9694
%cond = fcmp oeq double %x, %y
9795
%r = select i1 %cond, double %z, double 42.0
@@ -112,9 +110,7 @@ define double @fsel_nonzero_true_val(double %x, double %y, double %z) {
112110
; AVX: # %bb.0:
113111
; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0
114112
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
115-
; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm1
116-
; AVX-NEXT: vandnpd %xmm2, %xmm0, %xmm0
117-
; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
113+
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
118114
; AVX-NEXT: retq
119115
%cond = fcmp oeq double %x, %y
120116
%r = select i1 %cond, double 42.0, double %z

0 commit comments

Comments
 (0)