Skip to content

Commit 0da163a

Browse files
committed
Revert r373172 "[X86] Add custom isel logic to match VPTERNLOG from 2 logic ops."
This seems to be causing some performance regresions that I'm trying to investigate. One thing that stands out is that this transform can increase the live range of the operands of the earlier logic op. This can be bad for register allocation. If there are two logic op inputs we should really combine the one that is closest, but SelectionDAG doesn't have a good way to do that. Maybe we need to do this as a basic block transform in Machine IR. llvm-svn: 373401
1 parent 5269091 commit 0da163a

26 files changed

+564
-748
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,6 @@ namespace {
514514
bool tryShiftAmountMod(SDNode *N);
515515
bool combineIncDecVector(SDNode *Node);
516516
bool tryShrinkShlLogicImm(SDNode *N);
517-
bool tryVPTERNLOG(SDNode *N);
518517
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
519518
bool tryMatchBitSelect(SDNode *N);
520519

@@ -3833,82 +3832,6 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
38333832
return true;
38343833
}
38353834

3836-
// Try to match two logic ops to a VPTERNLOG.
3837-
// FIXME: Handle inverted inputs?
3838-
// FIXME: Handle more complex patterns that use an operand more than once?
3839-
// FIXME: Support X86ISD::ANDNP
3840-
bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
3841-
MVT NVT = N->getSimpleValueType(0);
3842-
3843-
// Make sure we support VPTERNLOG.
3844-
if (!NVT.isVector() || !Subtarget->hasAVX512() ||
3845-
NVT.getVectorElementType() == MVT::i1)
3846-
return false;
3847-
3848-
// We need VLX for 128/256-bit.
3849-
if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
3850-
return false;
3851-
3852-
unsigned Opc1 = N->getOpcode();
3853-
SDValue N0 = N->getOperand(0);
3854-
SDValue N1 = N->getOperand(1);
3855-
3856-
auto isLogicOp = [](unsigned Opc) {
3857-
return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
3858-
};
3859-
3860-
SDValue A, B, C;
3861-
unsigned Opc2;
3862-
if (isLogicOp(N1.getOpcode()) && N1.hasOneUse()) {
3863-
Opc2 = N1.getOpcode();
3864-
A = N0;
3865-
B = N1.getOperand(0);
3866-
C = N1.getOperand(1);
3867-
} else if (isLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
3868-
Opc2 = N0.getOpcode();
3869-
A = N1;
3870-
B = N0.getOperand(0);
3871-
C = N0.getOperand(1);
3872-
} else
3873-
return false;
3874-
3875-
uint64_t Imm;
3876-
switch (Opc1) {
3877-
default: llvm_unreachable("Unexpected opcode!");
3878-
case ISD::AND:
3879-
switch (Opc2) {
3880-
default: llvm_unreachable("Unexpected opcode!");
3881-
case ISD::AND: Imm = 0x80; break;
3882-
case ISD::OR: Imm = 0xe0; break;
3883-
case ISD::XOR: Imm = 0x60; break;
3884-
}
3885-
break;
3886-
case ISD::OR:
3887-
switch (Opc2) {
3888-
default: llvm_unreachable("Unexpected opcode!");
3889-
case ISD::AND: Imm = 0xf8; break;
3890-
case ISD::OR: Imm = 0xfe; break;
3891-
case ISD::XOR: Imm = 0xf6; break;
3892-
}
3893-
break;
3894-
case ISD::XOR:
3895-
switch (Opc2) {
3896-
default: llvm_unreachable("Unexpected opcode!");
3897-
case ISD::AND: Imm = 0x78; break;
3898-
case ISD::OR: Imm = 0x1e; break;
3899-
case ISD::XOR: Imm = 0x96; break;
3900-
}
3901-
break;
3902-
}
3903-
3904-
SDLoc DL(N);
3905-
SDValue New = CurDAG->getNode(X86ISD::VPTERNLOG, DL, NVT, A, B, C,
3906-
CurDAG->getTargetConstant(Imm, DL, MVT::i8));
3907-
ReplaceNode(N, New.getNode());
3908-
SelectCode(New.getNode());
3909-
return true;
3910-
}
3911-
39123835
/// Convert vector increment or decrement to sub/add with an all-ones constant:
39133836
/// add X, <1, 1...> --> sub X, <-1, -1...>
39143837
/// sub X, <1, 1...> --> add X, <-1, -1...>
@@ -4580,10 +4503,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
45804503
case ISD::XOR:
45814504
if (tryShrinkShlLogicImm(Node))
45824505
return;
4506+
45834507
if (Opcode == ISD::OR && tryMatchBitSelect(Node))
45844508
return;
4585-
if (tryVPTERNLOG(Node))
4586-
return;
45874509

45884510
LLVM_FALLTHROUGH;
45894511
case ISD::ADD:

llvm/test/CodeGen/X86/avx512-cvt.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -331,8 +331,8 @@ define <4 x float> @ulto4f32(<4 x i64> %a) {
331331
define <8 x double> @ulto8f64(<8 x i64> %a) {
332332
; NODQ-LABEL: ulto8f64:
333333
; NODQ: # %bb.0:
334-
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
335-
; NODQ-NEXT: vpternlogq $248, {{.*}}(%rip){1to8}, %zmm0, %zmm1
334+
; NODQ-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm1
335+
; NODQ-NEXT: vporq {{.*}}(%rip){1to8}, %zmm1, %zmm1
336336
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
337337
; NODQ-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
338338
; NODQ-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
@@ -356,20 +356,21 @@ define <16 x double> @ulto16f64(<16 x i64> %a) {
356356
; NODQ-LABEL: ulto16f64:
357357
; NODQ: # %bb.0:
358358
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295]
359-
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
360-
; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4
361-
; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, %zmm4
359+
; NODQ-NEXT: vpandq %zmm2, %zmm0, %zmm3
360+
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm4 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
361+
; NODQ-NEXT: vporq %zmm4, %zmm3, %zmm3
362362
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
363363
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
364364
; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0
365365
; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
366366
; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0
367-
; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0
368-
; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, %zmm3
367+
; NODQ-NEXT: vaddpd %zmm0, %zmm3, %zmm0
368+
; NODQ-NEXT: vpandq %zmm2, %zmm1, %zmm2
369+
; NODQ-NEXT: vporq %zmm4, %zmm2, %zmm2
369370
; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1
370371
; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1
371372
; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1
372-
; NODQ-NEXT: vaddpd %zmm1, %zmm3, %zmm1
373+
; NODQ-NEXT: vaddpd %zmm1, %zmm2, %zmm1
373374
; NODQ-NEXT: retq
374375
;
375376
; VLDQ-LABEL: ulto16f64:

0 commit comments

Comments
 (0)