Skip to content

Commit e47cd46

Browse files
committed
[X86] combineX86ShuffleChain - pass IsMaskedShuffle flag as argument from combineX86ShufflesRecursively instead of computing it internally. NFC.
Prep work toward better handling of shuffle combining across different vector widths.
1 parent a93cda4 commit e47cd46

File tree

1 file changed

+21
-21
lines changed

1 file changed

+21
-21
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -39602,7 +39602,7 @@ static bool matchBinaryPermuteShuffle(
3960239602
static SDValue combineX86ShuffleChainWithExtract(
3960339603
ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
3960439604
ArrayRef<const SDNode *> SrcNodes, bool AllowVariableCrossLaneMask,
39605-
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
39605+
bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG,
3960639606
const X86Subtarget &Subtarget);
3960739607

3960839608
/// Combine an arbitrary chain of shuffles into a single instruction if
@@ -39619,6 +39619,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3961939619
ArrayRef<const SDNode *> SrcNodes,
3962039620
bool AllowVariableCrossLaneMask,
3962139621
bool AllowVariablePerLaneMask,
39622+
bool IsMaskedShuffle,
3962239623
SelectionDAG &DAG,
3962339624
const X86Subtarget &Subtarget) {
3962439625
assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
@@ -39666,17 +39667,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3966639667
(RootVT.isFloatingPoint() && Depth >= 1) ||
3966739668
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
3966839669

39669-
// Don't combine if we are a AVX512/EVEX target and the mask element size
39670-
// is different from the root element size - this would prevent writemasks
39671-
// from being reused.
39672-
bool IsMaskedShuffle = false;
39673-
if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) {
39674-
if (Root.hasOneUse() && Root->user_begin()->getOpcode() == ISD::VSELECT &&
39675-
Root->user_begin()->getOperand(0).getScalarValueSizeInBits() == 1) {
39676-
IsMaskedShuffle = true;
39677-
}
39678-
}
39679-
3968039670
// If we are shuffling a splat (and not introducing zeros) then we can just
3968139671
// use it directly. This works for smaller elements as well as they already
3968239672
// repeat across each mask element.
@@ -40167,7 +40157,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4016740157
// shuffle with the larger type.
4016840158
if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
4016940159
Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask,
40170-
AllowVariablePerLaneMask, DAG, Subtarget))
40160+
AllowVariablePerLaneMask, IsMaskedShuffle, DAG, Subtarget))
4017140161
return WideShuffle;
4017240162

4017340163
// If we have a dual input lane-crossing shuffle then lower to VPERMV3,
@@ -40339,7 +40329,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4033940329
// shuffle with the larger type.
4034040330
if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
4034140331
Inputs, Root, BaseMask, Depth, SrcNodes, AllowVariableCrossLaneMask,
40342-
AllowVariablePerLaneMask, DAG, Subtarget))
40332+
AllowVariablePerLaneMask, IsMaskedShuffle, DAG, Subtarget))
4034340333
return WideShuffle;
4034440334

4034540335
// If we have a dual input shuffle then lower to VPERMV3,
@@ -40378,7 +40368,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
4037840368
static SDValue combineX86ShuffleChainWithExtract(
4037940369
ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
4038040370
ArrayRef<const SDNode *> SrcNodes, bool AllowVariableCrossLaneMask,
40381-
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
40371+
bool AllowVariablePerLaneMask, bool IsMaskedShuffle, SelectionDAG &DAG,
4038240372
const X86Subtarget &Subtarget) {
4038340373
unsigned NumMaskElts = BaseMask.size();
4038440374
unsigned NumInputs = Inputs.size();
@@ -40504,10 +40494,10 @@ static SDValue combineX86ShuffleChainWithExtract(
4050440494
assert(WideRoot.getValueSizeInBits() == WideSizeInBits &&
4050540495
"WideRootSize mismatch");
4050640496

40507-
if (SDValue WideShuffle =
40508-
combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth,
40509-
SrcNodes, AllowVariableCrossLaneMask,
40510-
AllowVariablePerLaneMask, DAG, Subtarget)) {
40497+
if (SDValue WideShuffle = combineX86ShuffleChain(
40498+
WideInputs, WideRoot, WideMask, Depth, SrcNodes,
40499+
AllowVariableCrossLaneMask, AllowVariablePerLaneMask, IsMaskedShuffle,
40500+
DAG, Subtarget)) {
4051140501
WideShuffle =
4051240502
extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
4051340503
return DAG.getBitcast(RootVT, WideShuffle);
@@ -41244,6 +41234,16 @@ static SDValue combineX86ShufflesRecursively(
4124441234
resolveTargetShuffleInputsAndMask(Ops, Mask);
4124541235
}
4124641236

41237+
// If we are a AVX512/EVEX target the mask element size should match the root
41238+
// element size to allow writemasks to be reused.
41239+
bool IsMaskedShuffle = false;
41240+
if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) {
41241+
if (Root.hasOneUse() && Root->user_begin()->getOpcode() == ISD::VSELECT &&
41242+
Root->user_begin()->getOperand(0).getScalarValueSizeInBits() == 1) {
41243+
IsMaskedShuffle = true;
41244+
}
41245+
}
41246+
4124741247
// We can only combine unary and binary shuffle mask cases.
4124841248
if (Ops.size() <= 2) {
4124941249
// Minor canonicalization of the accumulated shuffle mask to make it easier
@@ -41268,7 +41268,7 @@ static SDValue combineX86ShufflesRecursively(
4126841268
// Try to combine into a single shuffle instruction.
4126941269
if (SDValue Shuffle = combineX86ShuffleChain(
4127041270
Ops, Root, Mask, Depth, CombinedNodes, AllowVariableCrossLaneMask,
41271-
AllowVariablePerLaneMask, DAG, Subtarget))
41271+
AllowVariablePerLaneMask, IsMaskedShuffle, DAG, Subtarget))
4127241272
return Shuffle;
4127341273

4127441274
// If all the operands come from the same larger vector, fallthrough and try
@@ -41287,7 +41287,7 @@ static SDValue combineX86ShufflesRecursively(
4128741287
// shuffle with the larger type.
4128841288
return combineX86ShuffleChainWithExtract(
4128941289
Ops, Root, Mask, Depth, CombinedNodes, AllowVariableCrossLaneMask,
41290-
AllowVariablePerLaneMask, DAG, Subtarget);
41290+
AllowVariablePerLaneMask, IsMaskedShuffle, DAG, Subtarget);
4129141291
}
4129241292

4129341293
/// Helper entry wrapper to combineX86ShufflesRecursively.

0 commit comments

Comments
 (0)