Skip to content

Commit 0cab13d

Browse files
author
git apple-llvm automerger
committed
Merge commit '6f0ae81512c1' from llvm.org/master into apple/master
2 parents 9f34823 + 6f0ae81 commit 0cab13d

File tree

4 files changed

+459
-0
lines changed

4 files changed

+459
-0
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,23 @@ class CombinerHelper {
112112
void applyCombineConcatVectors(MachineInstr &MI, bool IsUndef,
113113
const ArrayRef<Register> Ops);
114114

115+
/// Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
116+
/// Returns true if MI changed.
117+
///
118+
/// \pre MI.getOpcode() == G_SHUFFLE_VECTOR.
119+
bool tryCombineShuffleVector(MachineInstr &MI);
120+
/// Check if the G_SHUFFLE_VECTOR \p MI can be replaced by a
121+
/// concat_vectors.
122+
/// \p Ops will contain the operands needed to produce the flattened
123+
/// concat_vectors.
124+
///
125+
/// \pre MI.getOpcode() == G_SHUFFLE_VECTOR.
126+
bool matchCombineShuffleVector(MachineInstr &MI,
127+
SmallVectorImpl<Register> &Ops);
128+
/// Replace \p MI with a concat_vectors with \p Ops.
129+
void applyCombineShuffleVector(MachineInstr &MI,
130+
const ArrayRef<Register> Ops);
131+
115132
/// Optimize memcpy intrinsics et al, e.g. constant len calls.
116133
/// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
117134
///

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,93 @@ void CombinerHelper::applyCombineConcatVectors(
173173
replaceRegWith(MRI, DstReg, NewDstReg);
174174
}
175175

176+
bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
177+
SmallVector<Register, 4> Ops;
178+
if (matchCombineShuffleVector(MI, Ops)) {
179+
applyCombineShuffleVector(MI, Ops);
180+
return true;
181+
}
182+
return false;
183+
}
184+
185+
bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
186+
SmallVectorImpl<Register> &Ops) {
187+
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
188+
"Invalid instruction kind");
189+
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
190+
Register Src1 = MI.getOperand(1).getReg();
191+
LLT SrcType = MRI.getType(Src1);
192+
unsigned DstNumElts = DstType.getNumElements();
193+
unsigned SrcNumElts = SrcType.getNumElements();
194+
195+
// If the resulting vector is smaller than the size of the source
196+
// vectors being concatenated, we won't be able to replace the
197+
// shuffle vector into a concat_vectors.
198+
//
199+
// Note: We may still be able to produce a concat_vectors fed by
200+
// extract_vector_elt and so on. It is less clear that would
201+
// be better though, so don't bother for now.
202+
if (DstNumElts < 2 * SrcNumElts)
203+
return false;
204+
205+
// Check that the shuffle mask can be broken evenly between the
206+
// different sources.
207+
if (DstNumElts % SrcNumElts != 0)
208+
return false;
209+
210+
// Mask length is a multiple of the source vector length.
211+
// Check if the shuffle is some kind of concatenation of the input
212+
// vectors.
213+
unsigned NumConcat = DstNumElts / SrcNumElts;
214+
SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
215+
SmallVector<int, 8> Mask;
216+
ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask);
217+
for (unsigned i = 0; i != DstNumElts; ++i) {
218+
int Idx = Mask[i];
219+
// Undef value.
220+
if (Idx < 0)
221+
continue;
222+
// Ensure the indices in each SrcType sized piece are sequential and that
223+
// the same source is used for the whole piece.
224+
if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
225+
(ConcatSrcs[i / SrcNumElts] >= 0 &&
226+
ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
227+
return false;
228+
// Remember which source this index came from.
229+
ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
230+
}
231+
232+
// The shuffle is concatenating multiple vectors together.
233+
// Collect the different operands for that.
234+
Register UndefReg;
235+
Register Src2 = MI.getOperand(2).getReg();
236+
for (auto Src : ConcatSrcs) {
237+
if (Src < 0) {
238+
if (!UndefReg) {
239+
Builder.setInsertPt(*MI.getParent(), MI);
240+
UndefReg = Builder.buildUndef(SrcType).getReg(0);
241+
}
242+
Ops.push_back(UndefReg);
243+
} else if (Src == 0)
244+
Ops.push_back(Src1);
245+
else
246+
Ops.push_back(Src2);
247+
}
248+
return true;
249+
}
250+
251+
void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
252+
const ArrayRef<Register> Ops) {
253+
Register DstReg = MI.getOperand(0).getReg();
254+
Builder.setInsertPt(*MI.getParent(), MI);
255+
Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
256+
257+
Builder.buildConcatVectors(NewDstReg, Ops);
258+
259+
MI.eraseFromParent();
260+
replaceRegWith(MRI, DstReg, NewDstReg);
261+
}
262+
176263
namespace {
177264

178265
/// Select a preference between two uses. CurrentUse is the current preference

llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
6464
switch (MI.getOpcode()) {
6565
case TargetOpcode::G_CONCAT_VECTORS:
6666
return Helper.tryCombineConcatVectors(MI);
67+
case TargetOpcode::G_SHUFFLE_VECTOR:
68+
return Helper.tryCombineShuffleVector(MI);
6769
case TargetOpcode::G_LOAD:
6870
case TargetOpcode::G_SEXTLOAD:
6971
case TargetOpcode::G_ZEXTLOAD: {

0 commit comments

Comments
 (0)