@@ -2688,6 +2688,10 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
2688
2688
// c) x & (-1 >> (32 - y))
2689
2689
// d) x << (32 - y) >> (32 - y)
2690
2690
bool X86DAGToDAGISel::matchBitExtract (SDNode *Node) {
2691
+ assert (
2692
+ (Node->getOpcode () == ISD::AND || Node->getOpcode () == ISD::SRL) &&
2693
+ " Should be either an and-mask, or right-shift after clearing high bits." );
2694
+
2691
2695
// BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
2692
2696
if (!Subtarget->hasBMI () && !Subtarget->hasBMI2 ())
2693
2697
return false ;
@@ -2698,13 +2702,16 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2698
2702
if (NVT != MVT::i32 && NVT != MVT::i64 )
2699
2703
return false ;
2700
2704
2705
+ unsigned Size = NVT.getSizeInBits ();
2706
+
2701
2707
SDValue NBits;
2702
2708
2703
2709
// If we have BMI2's BZHI, we are ok with muti-use patterns.
2704
2710
// Else, if we only have BMI1's BEXTR, we require one-use.
2705
2711
const bool CanHaveExtraUses = Subtarget->hasBMI2 ();
2706
- auto checkOneUse = [CanHaveExtraUses](SDValue Op) {
2707
- return CanHaveExtraUses || Op.hasOneUse ();
2712
+ auto checkOneUse = [CanHaveExtraUses](SDValue Op, unsigned NUses = 1 ) {
2713
+ return CanHaveExtraUses ||
2714
+ Op.getNode ()->hasNUsesOfValue (NUses, Op.getResNo ());
2708
2715
};
2709
2716
2710
2717
// a) x & ((1 << nbits) + (-1))
@@ -2740,31 +2747,73 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
2740
2747
return true ;
2741
2748
};
2742
2749
2750
+ SDValue X;
2751
+
2752
+ // d) x << (32 - y) >> (32 - y)
2753
+ auto matchPatternD = [&checkOneUse, Size, &X, &NBits](SDNode *Node) -> bool {
2754
+ if (Node->getOpcode () != ISD::SRL)
2755
+ return false ;
2756
+ SDValue N0 = Node->getOperand (0 );
2757
+ if (N0->getOpcode () != ISD::SHL || !checkOneUse (N0))
2758
+ return false ;
2759
+ SDValue N1 = Node->getOperand (1 );
2760
+ SDValue N01 = N0->getOperand (1 );
2761
+ // Both of the shifts must be by the exact same value.
2762
+ // There should not be any uses of the shift amount outside of the pattern.
2763
+ if (N1 != N01 || !checkOneUse (N1, 2 ))
2764
+ return false ;
2765
+ // Skip over a truncate of the shift amount.
2766
+ if (N1->getOpcode () == ISD::TRUNCATE) {
2767
+ N1 = N1->getOperand (0 );
2768
+ // The trunc should have been the only user of the real shift amount.
2769
+ if (!checkOneUse (N1))
2770
+ return false ;
2771
+ }
2772
+ // Match the shift amount as: (bitwidth - y). It should go away, too.
2773
+ if (N1.getOpcode () != ISD::SUB)
2774
+ return false ;
2775
+ auto N10 = dyn_cast<ConstantSDNode>(N1.getOperand (0 ));
2776
+ if (!N10 || N10->getZExtValue () != Size)
2777
+ return false ;
2778
+ X = N0->getOperand (0 );
2779
+ NBits = N1.getOperand (1 );
2780
+ return true ;
2781
+ };
2782
+
2743
2783
auto matchLowBitMask = [&matchPatternA,
2744
2784
&matchPatternB](SDValue Mask) -> bool {
2745
- // FIXME: patterns c, d .
2785
+ // FIXME: pattern c .
2746
2786
return matchPatternA (Mask) || matchPatternB (Mask);
2747
2787
};
2748
2788
2749
- SDValue X = Node->getOperand (0 );
2750
- SDValue Mask = Node->getOperand (1 );
2789
+ if (Node->getOpcode () == ISD::AND) {
2790
+ X = Node->getOperand (0 );
2791
+ SDValue Mask = Node->getOperand (1 );
2751
2792
2752
- if (matchLowBitMask (Mask)) {
2753
- // Great.
2754
- } else {
2755
- std::swap (X, Mask);
2756
- if (!matchLowBitMask (Mask))
2757
- return false ;
2758
- }
2793
+ if (matchLowBitMask (Mask)) {
2794
+ // Great.
2795
+ } else {
2796
+ std::swap (X, Mask);
2797
+ if (!matchLowBitMask (Mask))
2798
+ return false ;
2799
+ }
2800
+ } else if (!matchPatternD (Node))
2801
+ return false ;
2759
2802
2760
2803
SDLoc DL (Node);
2761
2804
2805
+ SDValue OrigNBits = NBits;
2806
+ // Do we need to truncate the shift amount?
2807
+ if (NBits.getValueType () != MVT::i8 ) {
2808
+ NBits = CurDAG->getNode (ISD::TRUNCATE, DL, MVT::i8 , NBits);
2809
+ insertDAGNode (*CurDAG, OrigNBits, NBits);
2810
+ }
2811
+
2762
2812
// Insert 8-bit NBits into lowest 8 bits of NVT-sized (32 or 64-bit) register.
2763
2813
// All the other bits are undefined, we do not care about them.
2764
2814
SDValue ImplDef =
2765
2815
SDValue (CurDAG->getMachineNode (TargetOpcode::IMPLICIT_DEF, DL, NVT), 0 );
2766
2816
insertDAGNode (*CurDAG, NBits, ImplDef);
2767
- SDValue OrigNBits = NBits;
2768
2817
NBits = CurDAG->getTargetInsertSubreg (X86::sub_8bit, DL, NVT, ImplDef, NBits);
2769
2818
insertDAGNode (*CurDAG, OrigNBits, NBits);
2770
2819
@@ -2963,17 +3012,8 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
2963
3012
if (ShiftAmt->getOpcode () == ISD::TRUNCATE)
2964
3013
ShiftAmt = ShiftAmt->getOperand (0 );
2965
3014
2966
- // Special case to avoid messing up a BZHI pattern.
2967
- // Look for (srl (shl X, (size - y)), (size - y)
2968
- if (Subtarget->hasBMI2 () && (VT == MVT::i32 || VT == MVT::i64 ) &&
2969
- N->getOpcode () == ISD::SRL && N->getOperand (0 ).getOpcode () == ISD::SHL &&
2970
- // Shift amounts the same?
2971
- N->getOperand (1 ) == N->getOperand (0 ).getOperand (1 ) &&
2972
- // Shift amounts size - y?
2973
- ShiftAmt.getOpcode () == ISD::SUB &&
2974
- isa<ConstantSDNode>(ShiftAmt.getOperand (0 )) &&
2975
- cast<ConstantSDNode>(ShiftAmt.getOperand (0 ))->getZExtValue () == Size)
2976
- return false ;
3015
+ // This function is called after X86DAGToDAGISel::matchBitExtract(),
3016
+ // so we are not afraid that we might mess up BZHI/BEXTR pattern.
2977
3017
2978
3018
SDValue NewShiftAmt;
2979
3019
if (ShiftAmt->getOpcode () == ISD::ADD || ShiftAmt->getOpcode () == ISD::SUB) {
@@ -3172,6 +3212,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
3172
3212
}
3173
3213
3174
3214
case ISD::SRL:
3215
+ if (matchBitExtract (Node))
3216
+ return ;
3217
+ LLVM_FALLTHROUGH;
3175
3218
case ISD::SRA:
3176
3219
case ISD::SHL:
3177
3220
if (tryShiftAmountMod (Node))
0 commit comments