Skip to content

Commit 4981f8c

Browse files
authored
[PowerPC] Fix vector_shuffle combines when inputs are scalar_to_vector of differing types. (llvm#80784)
This patch fixes the combines for vector_shuffles when either or both of its left and right hand side inputs are scalar_to_vector nodes. Previously, when both left and right side inputs are scalar_to_vector nodes, the current combine could not handle this situation, as the shuffle mask was updated incorrectly. To temporarily solve this solution, this combine was simply disabled and not performed. Now, not only does this patch aim to resolve the previous issue of the incorrect shuffle mask adjustments respectively, but it also updates any test cases that are affected by this change. Patch migrated from https://reviews.llvm.org/D130487.
1 parent 3fcb968 commit 4981f8c

File tree

6 files changed

+836
-946
lines changed

6 files changed

+836
-946
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 94 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -15687,16 +15687,20 @@ static SDValue isScalarToVec(SDValue Op) {
1568715687
// On little endian, that's just the corresponding element in the other
1568815688
// half of the vector. On big endian, it is in the same half but right
1568915689
// justified rather than left justified in that half.
15690-
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15691-
int LHSMaxIdx, int RHSMinIdx,
15692-
int RHSMaxIdx, int HalfVec,
15693-
unsigned ValidLaneWidth,
15694-
const PPCSubtarget &Subtarget) {
15695-
for (int i = 0, e = ShuffV.size(); i < e; i++) {
15696-
int Idx = ShuffV[i];
15697-
if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15698-
ShuffV[i] +=
15699-
Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15690+
static void fixupShuffleMaskForPermutedSToV(
15691+
SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15692+
int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15693+
unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15694+
int LHSEltFixup =
15695+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15696+
int RHSEltFixup =
15697+
Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15698+
for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15699+
int Idx = ShuffV[I];
15700+
if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15701+
ShuffV[I] += LHSEltFixup;
15702+
else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15703+
ShuffV[I] += RHSEltFixup;
1570015704
}
1570115705
}
1570215706

@@ -15735,6 +15739,51 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
1573515739
OrigSToV.getOperand(0));
1573615740
}
1573715741

15742+
static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15743+
int HalfVec, int LHSLastElementDefined,
15744+
int RHSLastElementDefined) {
15745+
for (int Index : ShuffV) {
15746+
if (Index < 0) // Skip explicitly undefined mask indices.
15747+
continue;
15748+
// Handle first input vector of the vector_shuffle.
15749+
if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15750+
(Index > LHSLastElementDefined))
15751+
return false;
15752+
// Handle second input vector of the vector_shuffle.
15753+
if ((RHSLastElementDefined >= 0) &&
15754+
(Index > HalfVec + RHSLastElementDefined))
15755+
return false;
15756+
}
15757+
return true;
15758+
}
15759+
15760+
static SDValue generateSToVPermutedForVecShuffle(
15761+
int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15762+
int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15763+
SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15764+
EVT VecShuffOperandType = VecShuffOperand.getValueType();
15765+
// Set up the values for the shuffle vector fixup.
15766+
NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15767+
// The last element depends on if the input comes from the LHS or RHS.
15768+
//
15769+
// For example:
15770+
// (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15771+
//
15772+
// For the LHS: The last element that comes from the LHS is actually 0, not 3
15773+
// because elements 1 and higher of a scalar_to_vector are undefined.
15774+
// For the RHS: The last element that comes from the RHS is actually 5, not 7
15775+
// because elements 1 and higher of a scalar_to_vector are undefined.
15776+
// It is also not 4 because the original scalar_to_vector is wider and
15777+
// actually contains two i32 elements.
15778+
LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
15779+
? ScalarSize / ShuffleEltWidth - 1 + FirstElt
15780+
: FirstElt;
15781+
SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15782+
if (SToVPermuted.getValueType() != VecShuffOperandType)
15783+
SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15784+
return SToVPermuted;
15785+
}
15786+
1573815787
// On little endian subtargets, combine shuffles such as:
1573915788
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
1574015789
// into:
@@ -15782,71 +15831,64 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
1578215831
SDValue SToVLHS = isScalarToVec(LHS);
1578315832
SDValue SToVRHS = isScalarToVec(RHS);
1578415833
if (SToVLHS || SToVRHS) {
15785-
// FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15786-
// same type and have differing element sizes, then do not perform
15787-
// the following transformation. The current transformation for
15788-
// SCALAR_TO_VECTOR assumes that both input vectors have the same
15789-
// element size. This will be updated in the future to account for
15790-
// differing sizes of the LHS and RHS.
15791-
if (SToVLHS && SToVRHS &&
15792-
(SToVLHS.getValueType().getScalarSizeInBits() !=
15793-
SToVRHS.getValueType().getScalarSizeInBits()))
15794-
return Res;
15795-
15796-
int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15797-
: SToVRHS.getValueType().getVectorNumElements();
15798-
int NumEltsOut = ShuffV.size();
15834+
EVT VT = SVN->getValueType(0);
15835+
uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15836+
int ShuffleNumElts = ShuffV.size();
15837+
int HalfVec = ShuffleNumElts / 2;
1579915838
// The width of the "valid lane" (i.e. the lane that contains the value that
1580015839
// is vectorized) needs to be expressed in terms of the number of elements
1580115840
// of the shuffle. It is thereby the ratio of the values before and after
15802-
// any bitcast.
15803-
unsigned ValidLaneWidth =
15804-
SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15805-
LHS.getValueType().getScalarSizeInBits()
15806-
: SToVRHS.getValueType().getScalarSizeInBits() /
15807-
RHS.getValueType().getScalarSizeInBits();
15841+
// any bitcast, which will be set later on if the LHS or RHS are
15842+
// SCALAR_TO_VECTOR nodes.
15843+
unsigned LHSNumValidElts = HalfVec;
15844+
unsigned RHSNumValidElts = HalfVec;
1580815845

1580915846
// Initially assume that neither input is permuted. These will be adjusted
15810-
// accordingly if either input is.
15811-
int LHSMaxIdx = -1;
15812-
int RHSMinIdx = -1;
15813-
int RHSMaxIdx = -1;
15814-
int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15847+
// accordingly if either input is. Note, that -1 means that all elements
15848+
// are undefined.
15849+
int LHSFirstElt = 0;
15850+
int RHSFirstElt = ShuffleNumElts;
15851+
int LHSLastElt = -1;
15852+
int RHSLastElt = -1;
1581515853

1581615854
// Get the permuted scalar to vector nodes for the source(s) that come from
1581715855
// ISD::SCALAR_TO_VECTOR.
1581815856
// On big endian systems, this only makes sense for element sizes smaller
1581915857
// than 64 bits since for 64-bit elements, all instructions already put
1582015858
// the value into element zero. Since scalar size of LHS and RHS may differ
1582115859
// after isScalarToVec, this should be checked using their own sizes.
15860+
int LHSScalarSize = 0;
15861+
int RHSScalarSize = 0;
1582215862
if (SToVLHS) {
15823-
if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15863+
LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15864+
if (!IsLittleEndian && LHSScalarSize >= 64)
1582415865
return Res;
15825-
// Set up the values for the shuffle vector fixup.
15826-
LHSMaxIdx = NumEltsOut / NumEltsIn;
15827-
SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15828-
if (SToVLHS.getValueType() != LHS.getValueType())
15829-
SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15830-
LHS = SToVLHS;
1583115866
}
1583215867
if (SToVRHS) {
15833-
if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15868+
RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15869+
if (!IsLittleEndian && RHSScalarSize >= 64)
1583415870
return Res;
15835-
RHSMinIdx = NumEltsOut;
15836-
RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15837-
SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15838-
if (SToVRHS.getValueType() != RHS.getValueType())
15839-
SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15840-
RHS = SToVRHS;
1584115871
}
15872+
if (LHSScalarSize != 0)
15873+
LHS = generateSToVPermutedForVecShuffle(
15874+
LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15875+
LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15876+
if (RHSScalarSize != 0)
15877+
RHS = generateSToVPermutedForVecShuffle(
15878+
RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15879+
RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15880+
15881+
if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15882+
return Res;
1584215883

1584315884
// Fix up the shuffle mask to reflect where the desired element actually is.
1584415885
// The minimum and maximum indices that correspond to element zero for both
1584515886
// the LHS and RHS are computed and will control which shuffle mask entries
1584615887
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15847-
// entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15848-
fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15849-
HalfVec, ValidLaneWidth, Subtarget);
15888+
// entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15889+
fixupShuffleMaskForPermutedSToV(
15890+
ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15891+
LHSNumValidElts, RHSNumValidElts, Subtarget);
1585015892
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
1585115893

1585215894
// We may have simplified away the shuffle. We won't be able to do anything

llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2499,11 +2499,9 @@ define <2 x i64> @buildi2(i64 %arg, i32 %arg1) {
24992499
;
25002500
; CHECK-LE-LABEL: buildi2:
25012501
; CHECK-LE: # %bb.0: # %entry
2502-
; CHECK-LE-NEXT: mtfprd f0, r4
2502+
; CHECK-LE-NEXT: mtfprwz f0, r4
25032503
; CHECK-LE-NEXT: mtfprd f1, r3
2504-
; CHECK-LE-NEXT: xxswapd vs0, vs0
2505-
; CHECK-LE-NEXT: xxswapd v2, vs1
2506-
; CHECK-LE-NEXT: xxmrgld v2, v2, vs0
2504+
; CHECK-LE-NEXT: xxmrghd v2, vs1, vs0
25072505
; CHECK-LE-NEXT: blr
25082506
;
25092507
; CHECK-AIX-LABEL: buildi2:

0 commit comments

Comments
 (0)