Skip to content

Commit 62bea74

Browse files
committed
[X86][BMI1] X86DAGToDAGISel: select BEXTR from x << (32 - y) >> (32 - y) pattern
Summary: Continuation of D52348. We also get the `c) x & (-1 >> (32 - y))` pattern here, because of the D48768. I will add extra-uses into those tests and follow-up with a patch to handle those patterns too. Reviewers: RKSimon, craig.topper Reviewed By: craig.topper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D53521 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@345014 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 8e47a8d commit 62bea74

File tree

4 files changed

+258
-414
lines changed

4 files changed

+258
-414
lines changed

lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2688,6 +2688,10 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
26882688
// c) x & (-1 >> (32 - y))
26892689
// d) x << (32 - y) >> (32 - y)
26902690
bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2691+
assert(
2692+
(Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
2693+
"Should be either an and-mask, or right-shift after clearing high bits.");
2694+
26912695
// BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
26922696
if (!Subtarget->hasBMI() && !Subtarget->hasBMI2())
26932697
return false;
@@ -2698,13 +2702,16 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
26982702
if (NVT != MVT::i32 && NVT != MVT::i64)
26992703
return false;
27002704

2705+
unsigned Size = NVT.getSizeInBits();
2706+
27012707
SDValue NBits;
27022708

27032709
// If we have BMI2's BZHI, we are ok with muti-use patterns.
27042710
// Else, if we only have BMI1's BEXTR, we require one-use.
27052711
const bool CanHaveExtraUses = Subtarget->hasBMI2();
2706-
auto checkOneUse = [CanHaveExtraUses](SDValue Op) {
2707-
return CanHaveExtraUses || Op.hasOneUse();
2712+
auto checkOneUse = [CanHaveExtraUses](SDValue Op, unsigned NUses = 1) {
2713+
return CanHaveExtraUses ||
2714+
Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
27082715
};
27092716

27102717
// a) x & ((1 << nbits) + (-1))
@@ -2740,31 +2747,73 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
27402747
return true;
27412748
};
27422749

2750+
SDValue X;
2751+
2752+
// d) x << (32 - y) >> (32 - y)
2753+
auto matchPatternD = [&checkOneUse, Size, &X, &NBits](SDNode *Node) -> bool {
2754+
if (Node->getOpcode() != ISD::SRL)
2755+
return false;
2756+
SDValue N0 = Node->getOperand(0);
2757+
if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
2758+
return false;
2759+
SDValue N1 = Node->getOperand(1);
2760+
SDValue N01 = N0->getOperand(1);
2761+
// Both of the shifts must be by the exact same value.
2762+
// There should not be any uses of the shift amount outside of the pattern.
2763+
if (N1 != N01 || !checkOneUse(N1, 2))
2764+
return false;
2765+
// Skip over a truncate of the shift amount.
2766+
if (N1->getOpcode() == ISD::TRUNCATE) {
2767+
N1 = N1->getOperand(0);
2768+
// The trunc should have been the only user of the real shift amount.
2769+
if (!checkOneUse(N1))
2770+
return false;
2771+
}
2772+
// Match the shift amount as: (bitwidth - y). It should go away, too.
2773+
if (N1.getOpcode() != ISD::SUB)
2774+
return false;
2775+
auto N10 = dyn_cast<ConstantSDNode>(N1.getOperand(0));
2776+
if (!N10 || N10->getZExtValue() != Size)
2777+
return false;
2778+
X = N0->getOperand(0);
2779+
NBits = N1.getOperand(1);
2780+
return true;
2781+
};
2782+
27432783
auto matchLowBitMask = [&matchPatternA,
27442784
&matchPatternB](SDValue Mask) -> bool {
2745-
// FIXME: patterns c, d.
2785+
// FIXME: pattern c.
27462786
return matchPatternA(Mask) || matchPatternB(Mask);
27472787
};
27482788

2749-
SDValue X = Node->getOperand(0);
2750-
SDValue Mask = Node->getOperand(1);
2789+
if (Node->getOpcode() == ISD::AND) {
2790+
X = Node->getOperand(0);
2791+
SDValue Mask = Node->getOperand(1);
27512792

2752-
if (matchLowBitMask(Mask)) {
2753-
// Great.
2754-
} else {
2755-
std::swap(X, Mask);
2756-
if (!matchLowBitMask(Mask))
2757-
return false;
2758-
}
2793+
if (matchLowBitMask(Mask)) {
2794+
// Great.
2795+
} else {
2796+
std::swap(X, Mask);
2797+
if (!matchLowBitMask(Mask))
2798+
return false;
2799+
}
2800+
} else if (!matchPatternD(Node))
2801+
return false;
27592802

27602803
SDLoc DL(Node);
27612804

2805+
SDValue OrigNBits = NBits;
2806+
// Do we need to truncate the shift amount?
2807+
if (NBits.getValueType() != MVT::i8) {
2808+
NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits);
2809+
insertDAGNode(*CurDAG, OrigNBits, NBits);
2810+
}
2811+
27622812
// Insert 8-bit NBits into lowest 8 bits of NVT-sized (32 or 64-bit) register.
27632813
// All the other bits are undefined, we do not care about them.
27642814
SDValue ImplDef =
27652815
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, NVT), 0);
27662816
insertDAGNode(*CurDAG, NBits, ImplDef);
2767-
SDValue OrigNBits = NBits;
27682817
NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, NVT, ImplDef, NBits);
27692818
insertDAGNode(*CurDAG, OrigNBits, NBits);
27702819

@@ -2963,17 +3012,8 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
29633012
if (ShiftAmt->getOpcode() == ISD::TRUNCATE)
29643013
ShiftAmt = ShiftAmt->getOperand(0);
29653014

2966-
// Special case to avoid messing up a BZHI pattern.
2967-
// Look for (srl (shl X, (size - y)), (size - y)
2968-
if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) &&
2969-
N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL &&
2970-
// Shift amounts the same?
2971-
N->getOperand(1) == N->getOperand(0).getOperand(1) &&
2972-
// Shift amounts size - y?
2973-
ShiftAmt.getOpcode() == ISD::SUB &&
2974-
isa<ConstantSDNode>(ShiftAmt.getOperand(0)) &&
2975-
cast<ConstantSDNode>(ShiftAmt.getOperand(0))->getZExtValue() == Size)
2976-
return false;
3015+
// This function is called after X86DAGToDAGISel::matchBitExtract(),
3016+
// so we are not afraid that we might mess up BZHI/BEXTR pattern.
29773017

29783018
SDValue NewShiftAmt;
29793019
if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
@@ -3172,6 +3212,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
31723212
}
31733213

31743214
case ISD::SRL:
3215+
if (matchBitExtract(Node))
3216+
return;
3217+
LLVM_FALLTHROUGH;
31753218
case ISD::SRA:
31763219
case ISD::SHL:
31773220
if (tryShiftAmountMod(Node))

lib/Target/X86/X86InstrInfo.td

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2519,14 +2519,6 @@ let Predicates = [HasBMI2] in {
25192519
(and (x86memop addr:$src),
25202520
(srl -1, (sub bitwidth, GR8:$lz))),
25212521
RC, VT, DstInst, DstMemInst>;
2522-
2523-
// x << (bitwidth - y) >> (bitwidth - y)
2524-
defm : _bmi_bzhi_pattern<(srl (shl RC:$src, (sub bitwidth, GR8:$lz)),
2525-
(sub bitwidth, GR8:$lz)),
2526-
(srl (shl (x86memop addr:$src),
2527-
(sub bitwidth, GR8:$lz)),
2528-
(sub bitwidth, GR8:$lz)),
2529-
RC, VT, DstInst, DstMemInst>;
25302522
}
25312523

25322524
defm : bmi_bzhi_patterns<GR32, 32, i32, BZHI32rr, loadi32, BZHI32rm>;
@@ -2545,24 +2537,6 @@ let Predicates = [HasBMI2] in {
25452537
def : Pat<(and (loadi64 addr:$src), (srl -1, (i8 (trunc (sub 64, GR32:$lz))))),
25462538
(BZHI64rm addr:$src,
25472539
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
2548-
2549-
// x << (32 - y) >> (32 - y)
2550-
def : Pat<(srl (shl GR32:$src, (i8 (trunc (sub 32, GR32:$lz)))),
2551-
(i8 (trunc (sub 32, GR32:$lz)))),
2552-
(BZHI32rr GR32:$src, GR32:$lz)>;
2553-
def : Pat<(srl (shl (loadi32 addr:$src), (i8 (trunc (sub 32, GR32:$lz)))),
2554-
(i8 (trunc (sub 32, GR32:$lz)))),
2555-
(BZHI32rm addr:$src, GR32:$lz)>;
2556-
2557-
// x << (64 - y) >> (64 - y)
2558-
def : Pat<(srl (shl GR64:$src, (i8 (trunc (sub 64, GR32:$lz)))),
2559-
(i8 (trunc (sub 64, GR32:$lz)))),
2560-
(BZHI64rr GR64:$src,
2561-
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
2562-
def : Pat<(srl (shl (loadi64 addr:$src), (i8 (trunc (sub 64, GR32:$lz)))),
2563-
(i8 (trunc (sub 64, GR32:$lz)))),
2564-
(BZHI64rm addr:$src,
2565-
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$lz, sub_32bit))>;
25662540
} // HasBMI2
25672541

25682542
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,

0 commit comments

Comments
 (0)