@@ -39580,7 +39580,7 @@ static bool matchBinaryPermuteShuffle(
39580
39580
39581
39581
static SDValue combineX86ShuffleChainWithExtract(
39582
39582
ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
39583
- bool HasVariableMask , bool AllowVariableCrossLaneMask,
39583
+ ArrayRef<const SDNode *> SrcNodes , bool AllowVariableCrossLaneMask,
39584
39584
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
39585
39585
const X86Subtarget &Subtarget);
39586
39586
@@ -39595,7 +39595,7 @@ static SDValue combineX86ShuffleChainWithExtract(
39595
39595
/// instruction but should only be used to replace chains over a certain depth.
39596
39596
static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
39597
39597
ArrayRef<int> BaseMask, int Depth,
39598
- bool HasVariableMask ,
39598
+ ArrayRef<const SDNode *> SrcNodes ,
39599
39599
bool AllowVariableCrossLaneMask,
39600
39600
bool AllowVariablePerLaneMask,
39601
39601
SelectionDAG &DAG,
@@ -40064,6 +40064,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
40064
40064
if (Depth < 1)
40065
40065
return SDValue();
40066
40066
40067
+ bool HasVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) {
40068
+ return isTargetShuffleVariableMask(N->getOpcode());
40069
+ });
40070
+
40067
40071
// Depth threshold above which we can efficiently use variable mask shuffles.
40068
40072
int VariableCrossLaneShuffleDepth =
40069
40073
Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2;
@@ -40134,9 +40138,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
40134
40138
// If that failed and either input is extracted then try to combine as a
40135
40139
// shuffle with the larger type.
40136
40140
if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
40137
- Inputs, Root, BaseMask, Depth, HasVariableMask,
40138
- AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG,
40139
- Subtarget))
40141
+ Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask,
40142
+ AllowVariablePerLaneMask, DAG, Subtarget))
40140
40143
return WideShuffle;
40141
40144
40142
40145
// If we have a dual input lane-crossing shuffle then lower to VPERMV3,
@@ -40307,8 +40310,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
40307
40310
// If that failed and either input is extracted then try to combine as a
40308
40311
// shuffle with the larger type.
40309
40312
if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
40310
- Inputs, Root, BaseMask, Depth, HasVariableMask ,
40311
- AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget))
40313
+ Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask ,
40314
+ AllowVariablePerLaneMask, DAG, Subtarget))
40312
40315
return WideShuffle;
40313
40316
40314
40317
// If we have a dual input shuffle then lower to VPERMV3,
@@ -40346,7 +40349,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
40346
40349
// extract_subvector(shuffle(x,y,m2),0)
40347
40350
static SDValue combineX86ShuffleChainWithExtract(
40348
40351
ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
40349
- bool HasVariableMask , bool AllowVariableCrossLaneMask,
40352
+ ArrayRef<const SDNode *> SrcNodes , bool AllowVariableCrossLaneMask,
40350
40353
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
40351
40354
const X86Subtarget &Subtarget) {
40352
40355
unsigned NumMaskElts = BaseMask.size();
@@ -40475,7 +40478,7 @@ static SDValue combineX86ShuffleChainWithExtract(
40475
40478
40476
40479
if (SDValue WideShuffle =
40477
40480
combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth,
40478
- HasVariableMask , AllowVariableCrossLaneMask,
40481
+ SrcNodes , AllowVariableCrossLaneMask,
40479
40482
AllowVariablePerLaneMask, DAG, Subtarget)) {
40480
40483
WideShuffle =
40481
40484
extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
@@ -40698,7 +40701,7 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
40698
40701
// TODO: Extend this to merge multiple constant Ops and update the mask.
40699
40702
static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
40700
40703
ArrayRef<int> Mask,
40701
- bool HasVariableMask ,
40704
+ ArrayRef<const SDNode *> SrcNodes ,
40702
40705
SelectionDAG &DAG, const SDLoc &DL,
40703
40706
const X86Subtarget &Subtarget) {
40704
40707
unsigned SizeInBits = VT.getSizeInBits();
@@ -40720,6 +40723,9 @@ static SDValue combineX86ShufflesConstants(MVT VT, ArrayRef<SDValue> Ops,
40720
40723
// only used once or the combined shuffle has included a variable mask
40721
40724
// shuffle, this is to avoid constant pool bloat.
40722
40725
bool IsOptimizingSize = DAG.shouldOptForSize();
40726
+ bool HasVariableMask = llvm::any_of(SrcNodes, [](const SDNode *N) {
40727
+ return isTargetShuffleVariableMask(N->getOpcode());
40728
+ });
40723
40729
if (IsOptimizingSize && !HasVariableMask &&
40724
40730
llvm::none_of(Ops, [](SDValue SrcOp) { return SrcOp->hasOneUse(); }))
40725
40731
return SDValue();
@@ -40821,7 +40827,7 @@ namespace llvm {
40821
40827
static SDValue combineX86ShufflesRecursively(
40822
40828
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
40823
40829
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
40824
- unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask,
40830
+ unsigned MaxDepth, bool AllowVariableCrossLaneMask,
40825
40831
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
40826
40832
const X86Subtarget &Subtarget) {
40827
40833
assert(!RootMask.empty() &&
@@ -40877,7 +40883,6 @@ static SDValue combineX86ShufflesRecursively(
40877
40883
SmallVector<int, 64> OpMask;
40878
40884
SmallVector<SDValue, 2> OpInputs;
40879
40885
APInt OpUndef, OpZero;
40880
- bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
40881
40886
if (getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
40882
40887
OpZero, DAG, Depth, false)) {
40883
40888
// Shuffle inputs must not be larger than the shuffle result.
@@ -41092,7 +41097,6 @@ static SDValue combineX86ShufflesRecursively(
41092
41097
return getOnesVector(RootVT, DAG, DL);
41093
41098
41094
41099
assert(!Ops.empty() && "Shuffle with no inputs detected");
41095
- HasVariableMask |= IsOpVariableMask;
41096
41100
41097
41101
// Update the list of shuffle nodes that have been combined so far.
41098
41102
SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes);
@@ -41121,15 +41125,14 @@ static SDValue combineX86ShufflesRecursively(
41121
41125
}
41122
41126
if (SDValue Res = combineX86ShufflesRecursively(
41123
41127
Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth,
41124
- HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG,
41125
- Subtarget))
41128
+ AllowCrossLaneVar, AllowPerLaneVar, DAG, Subtarget))
41126
41129
return Res;
41127
41130
}
41128
41131
}
41129
41132
41130
41133
// Attempt to constant fold all of the constant source ops.
41131
41134
if (SDValue Cst = combineX86ShufflesConstants(
41132
- RootVT, Ops, Mask, HasVariableMask , DAG, DL, Subtarget))
41135
+ RootVT, Ops, Mask, CombinedNodes , DAG, DL, Subtarget))
41133
41136
return Cst;
41134
41137
41135
41138
// If constant fold failed and we only have constants - then we have
@@ -41231,7 +41234,7 @@ static SDValue combineX86ShufflesRecursively(
41231
41234
41232
41235
// Try to combine into a single shuffle instruction.
41233
41236
if (SDValue Shuffle = combineX86ShuffleChain(
41234
- Ops, Root, Mask, Depth, HasVariableMask , AllowVariableCrossLaneMask,
41237
+ Ops, Root, Mask, Depth, CombinedNodes , AllowVariableCrossLaneMask,
41235
41238
AllowVariablePerLaneMask, DAG, Subtarget))
41236
41239
return Shuffle;
41237
41240
@@ -41250,7 +41253,7 @@ static SDValue combineX86ShufflesRecursively(
41250
41253
// If that failed and any input is extracted then try to combine as a
41251
41254
// shuffle with the larger type.
41252
41255
return combineX86ShuffleChainWithExtract(
41253
- Ops, Root, Mask, Depth, HasVariableMask , AllowVariableCrossLaneMask,
41256
+ Ops, Root, Mask, Depth, CombinedNodes , AllowVariableCrossLaneMask,
41254
41257
AllowVariablePerLaneMask, DAG, Subtarget);
41255
41258
}
41256
41259
@@ -41259,7 +41262,6 @@ static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
41259
41262
const X86Subtarget &Subtarget) {
41260
41263
return combineX86ShufflesRecursively(
41261
41264
{Op}, 0, Op, {0}, {}, /*Depth*/ 0, X86::MaxShuffleCombineDepth,
41262
- /*HasVarMask*/ false,
41263
41265
/*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, DAG,
41264
41266
Subtarget);
41265
41267
}
@@ -41897,7 +41899,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
41897
41899
if (SDValue Res = combineX86ShufflesRecursively(
41898
41900
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
41899
41901
X86::MaxShuffleCombineDepth,
41900
- /*HasVarMask*/ false, /* AllowCrossLaneVarMask*/ true,
41902
+ /*AllowCrossLaneVarMask*/ true,
41901
41903
/*AllowPerLaneVarMask*/ true, DAG, Subtarget))
41902
41904
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
41903
41905
DAG.getBitcast(SrcVT, Res));
@@ -42236,7 +42238,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
42236
42238
llvm::narrowShuffleMaskElts(EltBits / 8, Mask, ByteMask);
42237
42239
if (SDValue NewMask = combineX86ShufflesConstants(
42238
42240
ShufVT, {MaskLHS, MaskRHS}, ByteMask,
42239
- /*HasVariableMask=*/true , DAG, DL, Subtarget)) {
42241
+ {LHS.getNode(), RHS.getNode()} , DAG, DL, Subtarget)) {
42240
42242
SDValue NewLHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
42241
42243
LHS.getOperand(0), NewMask);
42242
42244
SDValue NewRHS = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT,
@@ -43871,7 +43873,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
43871
43873
43872
43874
SDValue NewShuffle = combineX86ShufflesRecursively(
43873
43875
{Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth,
43874
- /*HasVarMask*/ false,
43875
43876
/*AllowCrossLaneVarMask*/ true, /*AllowPerLaneVarMask*/ true, TLO.DAG,
43876
43877
Subtarget);
43877
43878
if (NewShuffle)
@@ -51430,7 +51431,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
51430
51431
if (SDValue Shuffle = combineX86ShufflesRecursively(
51431
51432
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
51432
51433
X86::MaxShuffleCombineDepth,
51433
- /*HasVarMask*/ false, /* AllowVarCrossLaneMask*/ true,
51434
+ /*AllowVarCrossLaneMask*/ true,
51434
51435
/*AllowVarPerLaneMask*/ true, DAG, Subtarget))
51435
51436
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Shuffle,
51436
51437
N0.getOperand(1));
0 commit comments