@@ -15687,16 +15687,20 @@ static SDValue isScalarToVec(SDValue Op) {
15687
15687
// On little endian, that's just the corresponding element in the other
15688
15688
// half of the vector. On big endian, it is in the same half but right
15689
15689
// justified rather than left justified in that half.
15690
- static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15691
- int LHSMaxIdx, int RHSMinIdx,
15692
- int RHSMaxIdx, int HalfVec,
15693
- unsigned ValidLaneWidth,
15694
- const PPCSubtarget &Subtarget) {
15695
- for (int i = 0, e = ShuffV.size(); i < e; i++) {
15696
- int Idx = ShuffV[i];
15697
- if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15698
- ShuffV[i] +=
15699
- Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15690
+ static void fixupShuffleMaskForPermutedSToV(
15691
+ SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
15692
+ int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
15693
+ unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
15694
+ int LHSEltFixup =
15695
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
15696
+ int RHSEltFixup =
15697
+ Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
15698
+ for (int I = 0, E = ShuffV.size(); I < E; ++I) {
15699
+ int Idx = ShuffV[I];
15700
+ if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
15701
+ ShuffV[I] += LHSEltFixup;
15702
+ else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
15703
+ ShuffV[I] += RHSEltFixup;
15700
15704
}
15701
15705
}
15702
15706
@@ -15735,6 +15739,51 @@ static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15735
15739
OrigSToV.getOperand(0));
15736
15740
}
15737
15741
15742
+ static bool isShuffleMaskInRange(const SmallVectorImpl<int> &ShuffV,
15743
+ int HalfVec, int LHSLastElementDefined,
15744
+ int RHSLastElementDefined) {
15745
+ for (int Index : ShuffV) {
15746
+ if (Index < 0) // Skip explicitly undefined mask indices.
15747
+ continue;
15748
+ // Handle first input vector of the vector_shuffle.
15749
+ if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
15750
+ (Index > LHSLastElementDefined))
15751
+ return false;
15752
+ // Handle second input vector of the vector_shuffle.
15753
+ if ((RHSLastElementDefined >= 0) &&
15754
+ (Index > HalfVec + RHSLastElementDefined))
15755
+ return false;
15756
+ }
15757
+ return true;
15758
+ }
15759
+
15760
+ static SDValue generateSToVPermutedForVecShuffle(
15761
+ int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
15762
+ int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
15763
+ SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
15764
+ EVT VecShuffOperandType = VecShuffOperand.getValueType();
15765
+ // Set up the values for the shuffle vector fixup.
15766
+ NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
15767
+ // The last element depends on if the input comes from the LHS or RHS.
15768
+ //
15769
+ // For example:
15770
+ // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
15771
+ //
15772
+ // For the LHS: The last element that comes from the LHS is actually 0, not 3
15773
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15774
+ // For the RHS: The last element that comes from the RHS is actually 5, not 7
15775
+ // because elements 1 and higher of a scalar_to_vector are undefined.
15776
+ // It is also not 4 because the original scalar_to_vector is wider and
15777
+ // actually contains two i32 elements.
15778
+ LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
15779
+ ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
15780
+ : FirstElt;
15781
+ SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
15782
+ if (SToVPermuted.getValueType() != VecShuffOperandType)
15783
+ SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
15784
+ return SToVPermuted;
15785
+ }
15786
+
15738
15787
// On little endian subtargets, combine shuffles such as:
15739
15788
// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15740
15789
// into:
@@ -15782,71 +15831,64 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15782
15831
SDValue SToVLHS = isScalarToVec(LHS);
15783
15832
SDValue SToVRHS = isScalarToVec(RHS);
15784
15833
if (SToVLHS || SToVRHS) {
15785
- // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15786
- // same type and have differing element sizes, then do not perform
15787
- // the following transformation. The current transformation for
15788
- // SCALAR_TO_VECTOR assumes that both input vectors have the same
15789
- // element size. This will be updated in the future to account for
15790
- // differing sizes of the LHS and RHS.
15791
- if (SToVLHS && SToVRHS &&
15792
- (SToVLHS.getValueType().getScalarSizeInBits() !=
15793
- SToVRHS.getValueType().getScalarSizeInBits()))
15794
- return Res;
15795
-
15796
- int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15797
- : SToVRHS.getValueType().getVectorNumElements();
15798
- int NumEltsOut = ShuffV.size();
15834
+ EVT VT = SVN->getValueType(0);
15835
+ uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
15836
+ int ShuffleNumElts = ShuffV.size();
15837
+ int HalfVec = ShuffleNumElts / 2;
15799
15838
// The width of the "valid lane" (i.e. the lane that contains the value that
15800
15839
// is vectorized) needs to be expressed in terms of the number of elements
15801
15840
// of the shuffle. It is thereby the ratio of the values before and after
15802
- // any bitcast.
15803
- unsigned ValidLaneWidth =
15804
- SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15805
- LHS.getValueType().getScalarSizeInBits()
15806
- : SToVRHS.getValueType().getScalarSizeInBits() /
15807
- RHS.getValueType().getScalarSizeInBits();
15841
+ // any bitcast, which will be set later on if the LHS or RHS are
15842
+ // SCALAR_TO_VECTOR nodes.
15843
+ unsigned LHSNumValidElts = HalfVec;
15844
+ unsigned RHSNumValidElts = HalfVec;
15808
15845
15809
15846
// Initially assume that neither input is permuted. These will be adjusted
15810
- // accordingly if either input is.
15811
- int LHSMaxIdx = -1;
15812
- int RHSMinIdx = -1;
15813
- int RHSMaxIdx = -1;
15814
- int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15847
+ // accordingly if either input is. Note, that -1 means that all elements
15848
+ // are undefined.
15849
+ int LHSFirstElt = 0;
15850
+ int RHSFirstElt = ShuffleNumElts;
15851
+ int LHSLastElt = -1;
15852
+ int RHSLastElt = -1;
15815
15853
15816
15854
// Get the permuted scalar to vector nodes for the source(s) that come from
15817
15855
// ISD::SCALAR_TO_VECTOR.
15818
15856
// On big endian systems, this only makes sense for element sizes smaller
15819
15857
// than 64 bits since for 64-bit elements, all instructions already put
15820
15858
// the value into element zero. Since scalar size of LHS and RHS may differ
15821
15859
// after isScalarToVec, this should be checked using their own sizes.
15860
+ int LHSScalarSize = 0;
15861
+ int RHSScalarSize = 0;
15822
15862
if (SToVLHS) {
15823
- if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15863
+ LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
15864
+ if (!IsLittleEndian && LHSScalarSize >= 64)
15824
15865
return Res;
15825
- // Set up the values for the shuffle vector fixup.
15826
- LHSMaxIdx = NumEltsOut / NumEltsIn;
15827
- SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15828
- if (SToVLHS.getValueType() != LHS.getValueType())
15829
- SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15830
- LHS = SToVLHS;
15831
15866
}
15832
15867
if (SToVRHS) {
15833
- if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15868
+ RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
15869
+ if (!IsLittleEndian && RHSScalarSize >= 64)
15834
15870
return Res;
15835
- RHSMinIdx = NumEltsOut;
15836
- RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15837
- SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15838
- if (SToVRHS.getValueType() != RHS.getValueType())
15839
- SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15840
- RHS = SToVRHS;
15841
15871
}
15872
+ if (LHSScalarSize != 0)
15873
+ LHS = generateSToVPermutedForVecShuffle(
15874
+ LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
15875
+ LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
15876
+ if (RHSScalarSize != 0)
15877
+ RHS = generateSToVPermutedForVecShuffle(
15878
+ RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
15879
+ RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
15880
+
15881
+ if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
15882
+ return Res;
15842
15883
15843
15884
// Fix up the shuffle mask to reflect where the desired element actually is.
15844
15885
// The minimum and maximum indices that correspond to element zero for both
15845
15886
// the LHS and RHS are computed and will control which shuffle mask entries
15846
15887
// are to be changed. For example, if the RHS is permuted, any shuffle mask
15847
- // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15848
- fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15849
- HalfVec, ValidLaneWidth, Subtarget);
15888
+ // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
15889
+ fixupShuffleMaskForPermutedSToV(
15890
+ ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
15891
+ LHSNumValidElts, RHSNumValidElts, Subtarget);
15850
15892
Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15851
15893
15852
15894
// We may have simplified away the shuffle. We won't be able to do anything
0 commit comments