@@ -4975,12 +4975,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4975
4975
4976
4976
// As a backup, shuffles can be lowered via a vrgather instruction, possibly
4977
4977
// merged with a second vrgather.
4978
- SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
4979
-
4980
- // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
4981
- // half.
4982
- DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
4983
-
4978
+ SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
4984
4979
SmallVector<SDValue> MaskVals;
4985
4980
4986
4981
// Now construct the mask that will be used by the blended vrgather operation.
@@ -4989,28 +4984,20 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
4989
4984
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
4990
4985
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4991
4986
bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
4992
- GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4993
- ? DAG.getConstant(MaskIndex, DL, XLenVT)
4994
- : DAG.getUNDEF(XLenVT));
4995
- GatherIndicesRHS.push_back(
4996
- IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
4997
- : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
4998
- if (IsLHSOrUndefIndex && MaskIndex >= 0)
4999
- ++LHSIndexCounts[MaskIndex];
5000
- if (!IsLHSOrUndefIndex)
5001
- ++RHSIndexCounts[MaskIndex - NumElts];
4987
+ ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
4988
+ ? MaskIndex : -1);
4989
+ ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5002
4990
}
5003
4991
5004
4992
if (SwapOps) {
5005
4993
std::swap(V1, V2);
5006
- std::swap(GatherIndicesLHS, GatherIndicesRHS );
4994
+ std::swap(ShuffleMaskLHS, ShuffleMaskRHS );
5007
4995
}
5008
4996
5009
4997
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5010
4998
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5011
4999
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5012
5000
5013
- unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
5014
5001
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5015
5002
MVT IndexVT = VT.changeTypeToInteger();
5016
5003
// Since we can't introduce illegal index types at this stage, use i16 and
@@ -5038,6 +5025,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
5038
5025
// are handled above.
5039
5026
if (V2.isUndef()) {
5040
5027
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5028
+ SmallVector<SDValue> GatherIndicesLHS;
5029
+ for (int ShuffleIdx : ShuffleMaskLHS)
5030
+ GatherIndicesLHS.push_back(ShuffleIdx != -1
5031
+ ? DAG.getConstant(ShuffleIdx, DL, XLenVT)
5032
+ : DAG.getUNDEF(XLenVT));
5041
5033
SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5042
5034
LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5043
5035
Subtarget);
@@ -5046,50 +5038,13 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
5046
5038
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5047
5039
}
5048
5040
5049
- // Translate the gather index we computed above (and possibly swapped)
5050
- // back to a shuffle mask. This step should disappear once we complete
5051
- // the migration to recursive design.
5052
- SmallVector<int> ShuffleMaskLHS;
5053
- ShuffleMaskLHS.reserve(GatherIndicesLHS.size());
5054
- for (SDValue GatherIndex : GatherIndicesLHS) {
5055
- if (GatherIndex.isUndef()) {
5056
- ShuffleMaskLHS.push_back(-1);
5057
- continue;
5058
- }
5059
- auto *IdxC = cast<ConstantSDNode>(GatherIndex);
5060
- ShuffleMaskLHS.push_back(IdxC->getZExtValue());
5061
- }
5062
-
5063
- // Recursively invoke lowering for the LHS as if there were no RHS.
5064
- // This allows us to leverage all of our single source permute tricks.
5065
- SDValue Gather =
5066
- DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5067
- Gather = convertToScalableVector(ContainerVT, Gather, DAG, Subtarget);
5068
-
5069
- // Blend in second vector source with an additional vrgather.
5070
- V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5071
-
5072
- MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
5073
- SelectMask =
5074
- convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
5075
-
5076
- // If only one index is used, we can use a "splat" vrgather.
5077
- // TODO: We can splat the most-common index and fix-up any stragglers, if
5078
- // that's beneficial.
5079
- if (RHSIndexCounts.size() == 1) {
5080
- int SplatIndex = RHSIndexCounts.begin()->getFirst();
5081
- Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
5082
- DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
5083
- SelectMask, VL);
5084
- } else {
5085
- SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
5086
- RHSIndices =
5087
- convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
5088
- Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
5089
- SelectMask, VL);
5090
- }
5091
-
5092
- return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5041
+ // Recursively invoke lowering for each operand if we had two
5042
+ // independent single source permutes, and then combine the result via a
5043
+ // vselect. Note that the vselect will likely be folded back into the
5044
+ // second permute (vrgather, or other) by the post-isel combine.
5045
+ V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5046
+ V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5047
+ return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5093
5048
}
5094
5049
5095
5050
bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
0 commit comments