Skip to content

Commit d1889cf

Browse files
authored
[X86] combineX86ShuffleChain - provide list of combined shuffle nodes, replace HasVariableMask bool arg. NFC. (#127826)
Minor NFC refactor before making better variable mask combining decisions - isTargetShuffleVariableMask doesn't discriminate between fast (AND, PSHUFB etc.) and slow (VPERMV3 etc.) variable shuffles, so an opaque HasVariableMask is only of limited use.
1 parent a96444a commit d1889cf

File tree

1 file changed

+24
-23
lines changed

1 file changed

+24
-23
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -39580,7 +39580,7 @@ static bool matchBinaryPermuteShuffle(
3958039580

3958139581
static SDValue combineX86ShuffleChainWithExtract(
3958239582
ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
39583-
bool HasVariableMask, bool AllowVariableCrossLaneMask,
39583+
ArrayRef<const SDNode *> SrcNodes, bool AllowVariableCrossLaneMask,
3958439584
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
3958539585
const X86Subtarget &Subtarget);
3958639586

@@ -39595,7 +39595,7 @@ static SDValue combineX86ShuffleChainWithExtract(
3959539595
/// instruction but should only be used to replace chains over a certain depth.
3959639596
static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3959739597
ArrayRef<int> BaseMask, int Depth,
39598-
bool HasVariableMask,
39598+
ArrayRef<const SDNode *> SrcNodes,
3959939599
bool AllowVariableCrossLaneMask,
3960039600
bool AllowVariablePerLaneMask,
3960139601
SelectionDAG &DAG,
@@ -40064,6 +40064,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4006440064
if (Depth < 1)
4006540065
return SDValue();
4006640066

40067+
bool HasVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) {
40068+
return isTargetShuffleVariableMask(N->getOpcode());
40069+
});
40070+
4006740071
// Depth threshold above which we can efficiently use variable mask shuffles.
4006840072
int VariableCrossLaneShuffleDepth =
4006940073
Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2;
@@ -40134,9 +40138,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4013440138
// If that failed and either input is extracted then try to combine as a
4013540139
// shuffle with the larger type.
4013640140
if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
40137-
Inputs, Root, BaseMask, Depth, HasVariableMask,
40138-
AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG,
40139-
Subtarget))
40141+
Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask,
40142+
AllowVariablePerLaneMask, DAG, Subtarget))
4014040143
return WideShuffle;
4014140144

4014240145
// If we have a dual input lane-crossing shuffle then lower to VPERMV3,
@@ -40307,8 +40310,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4030740310
// If that failed and either input is extracted then try to combine as a
4030840311
// shuffle with the larger type.
4030940312
if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
40310-
Inputs, Root, BaseMask, Depth, HasVariableMask,
40311-
AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget))
40313+
Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask,
40314+
AllowVariablePerLaneMask, DAG, Subtarget))
4031240315
return WideShuffle;
4031340316

4031440317
// If we have a dual input shuffle then lower to VPERMV3,
@@ -40346,7 +40349,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4034640349
// extract_subvector(shuffle(x,y,m2),0)
4034740350
static SDValue combineX86ShuffleChainWithExtract(
4034840351
ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
40349-
bool HasVariableMask, bool AllowVariableCrossLaneMask,
40352+
ArrayRef<const SDNode *> SrcNodes, bool AllowVariableCrossLaneMask,
4035040353
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
4035140354
const X86Subtarget &Subtarget) {
4035240355
unsigned NumMaskElts = BaseMask.size();
@@ -40475,7 +40478,7 @@ static SDValue combineX86ShuffleChainWithExtract(
4047540478

4047640479
if (SDValue WideShuffle =
4047740480
combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth,
40478-
HasVariableMask, AllowVariableCrossLaneMask,
40481+
SrcNodes, AllowVariableCrossLaneMask,
4047940482
AllowVariablePerLaneMask, DAG, Subtarget)) {
4048040483
WideShuffle =
4048140484
extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
@@ -40698,7 +40701,7 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
4069840701
// TODO: Extend this to merge multiple constant Ops and update the mask.
4069940702
static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
4070040703
ArrayRef<int> Mask,
40701-
bool HasVariableMask,
40704+
ArrayRef<const SDNode *> SrcNodes,
4070240705
SelectionDAG &DAG, const SDLoc &DL,
4070340706
const X86Subtarget &Subtarget) {
4070440707
unsigned SizeInBits = VT.getSizeInBits();
@@ -40720,6 +40723,9 @@ static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
4072040723
// only used once or the combined shuffle has included a variable mask
4072140724
// shuffle, this is to avoid constant pool bloat.
4072240725
bool IsOptimizingSize = DAG.shouldOptForSize();
40726+
bool HasVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) {
40727+
return isTargetShuffleVariableMask(N->getOpcode());
40728+
});
4072340729
if (IsOptimizingSize && !HasVariableMask &&
4072440730
llvm::none_of(Ops, [](SDValue SrcOp) { return SrcOp->hasOneUse(); }))
4072540731
return SDValue();
@@ -40821,7 +40827,7 @@ namespace llvm {
4082140827
static SDValue combineX86ShufflesRecursively(
4082240828
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
4082340829
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
40824-
unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask,
40830+
unsigned MaxDepth, bool AllowVariableCrossLaneMask,
4082540831
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
4082640832
const X86Subtarget &Subtarget) {
4082740833
assert(!RootMask.empty() &&
@@ -40877,7 +40883,6 @@ static SDValue combineX86ShufflesRecursively(
4087740883
SmallVector<int, 64> OpMask;
4087840884
SmallVector<SDValue, 2> OpInputs;
4087940885
APInt OpUndef, OpZero;
40880-
bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
4088140886
if (getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
4088240887
OpZero, DAG, Depth, false)) {
4088340888
// Shuffle inputs must not be larger than the shuffle result.
@@ -41092,7 +41097,6 @@ static SDValue combineX86ShufflesRecursively(
4109241097
return getOnesVector(RootVT, DAG, DL);
4109341098

4109441099
assert(!Ops.empty() && "Shuffle with no inputs detected");
41095-
HasVariableMask |= IsOpVariableMask;
4109641100

4109741101
// Update the list of shuffle nodes that have been combined so far.
4109841102
SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes);
@@ -41121,15 +41125,14 @@ static SDValue combineX86ShufflesRecursively(
4112141125
}
4112241126
if (SDValue Res = combineX86ShufflesRecursively(
4112341127
Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth,
41124-
HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG,
41125-
Subtarget))
41128+
AllowCrossLaneVar, AllowPerLaneVar, DAG, Subtarget))
4112641129
return Res;
4112741130
}
4112841131
}
4112941132

4113041133
// Attempt to constant fold all of the constant source ops.
4113141134
if (SDValue Cst = combineX86ShufflesConstants(
41132-
RootVT, Ops, Mask, HasVariableMask, DAG, DL, Subtarget))
41135+
RootVT, Ops, Mask, CombinedNodes, DAG, DL, Subtarget))
4113341136
return Cst;
4113441137

4113541138
// If constant fold failed and we only have constants - then we have
@@ -41231,7 +41234,7 @@ static SDValue combineX86ShufflesRecursively(
4123141234

4123241235
// Try to combine into a single shuffle instruction.
4123341236
if (SDValue Shuffle = combineX86ShuffleChain(
41234-
Ops, Root, Mask, Depth, HasVariableMask, AllowVariableCrossLaneMask,
41237+
Ops, Root, Mask, Depth, CombinedNodes, AllowVariableCrossLaneMask,
4123541238
AllowVariablePerLaneMask, DAG, Subtarget))
4123641239
return Shuffle;
4123741240

@@ -41250,7 +41253,7 @@ static SDValue combineX86ShufflesRecursively(
4125041253
// If that failed and any input is extracted then try to combine as a
4125141254
// shuffle with the larger type.
4125241255
return combineX86ShuffleChainWithExtract(
41253-
Ops, Root, Mask, Depth, HasVariableMask, AllowVariableCrossLaneMask,
41256+
Ops, Root, Mask, Depth, CombinedNodes, AllowVariableCrossLaneMask,
4125441257
AllowVariablePerLaneMask, DAG, Subtarget);
4125541258
}
4125641259

@@ -41259,7 +41262,6 @@ static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
4125941262
const X86Subtarget &Subtarget) {
4126041263
return combineX86ShufflesRecursively(
4126141264
{Op}, 0, Op, {0}, {}, /*Depth*/ 0, X86::MaxShuffleCombineDepth,
41262-
/*HasVarMask*/ false,
4126341265
/*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, DAG,
4126441266
Subtarget);
4126541267
}
@@ -41897,7 +41899,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4189741899
if (SDValue Res = combineX86ShufflesRecursively(
4189841900
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
4189941901
X86::MaxShuffleCombineDepth,
41900-
/*HasVarMask*/ false, /*AllowCrossLaneVarMask*/ true,
41902+
/*AllowCrossLaneVarMask*/ true,
4190141903
/*AllowPerLaneVarMask*/ true, DAG, Subtarget))
4190241904
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
4190341905
DAG.getBitcast(SrcVT, Res));
@@ -42236,7 +42238,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4223642238
llvm::narrowShuffleMaskElts(EltBits / 8, Mask, ByteMask);
4223742239
if (SDValue NewMask = combineX86ShufflesConstants(
4223842240
ShufVT, {MaskLHS, MaskRHS}, ByteMask,
42239-
/*HasVariableMask=*/true, DAG, DL, Subtarget)) {
42241+
{LHS.getNode(), RHS.getNode()}, DAG, DL, Subtarget)) {
4224042242
SDValue NewLHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
4224142243
LHS.getOperand(0), NewMask);
4224242244
SDValue NewRHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
@@ -43871,7 +43873,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4387143873

4387243874
SDValue NewShuffle = combineX86ShufflesRecursively(
4387343875
{Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,
43874-
/*HasVarMask*/ false,
4387543876
/*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, TLO.DAG,
4387643877
Subtarget);
4387743878
if (NewShuffle)
@@ -51430,7 +51431,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
5143051431
if (SDValue Shuffle = combineX86ShufflesRecursively(
5143151432
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
5143251433
X86::MaxShuffleCombineDepth,
51433-
/*HasVarMask*/ false, /*AllowVarCrossLaneMask*/ true,
51434+
/*AllowVarCrossLaneMask*/ true,
5143451435
/*AllowVarPerLaneMask*/ true, DAG, Subtarget))
5143551436
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,
5143651437
N0.getOperand(1));

0 commit comments

Comments
 (0)