Skip to content

Commit 8f6a1a0

Browse files
committed
[GISel][AArch64] Combine G_BUILD_VECTOR(G_UNMERGE) with undef elements
This extends the existing legalization combine to fold G_BUILD_VECTOR where the sources are all from the same G_UNMERGE, to handle cases where some of the lanes are undef. This comes up in the legalization of <3 x ..> vectors in AArch64, where they are padded with undef. There are two choices for what to create. This patch just removes the G_BUILD_VECTOR/G_UNMERGE, losing the information about which lanes are undef. The alternative would be to generate an identity G_SHUFFLE_VECTOR with undef lanes marked as undef. I think both have advantages and disadvantages. Differential Revision: https://reviews.llvm.org/D158063
1 parent 4a68c27 commit 8f6a1a0

25 files changed

+866
-1765
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -900,17 +900,21 @@ class LegalizationArtifactCombiner {
900900
// same sequence. Search for elements using findValueFromDefImpl.
901901
bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx,
902902
GUnmerge *Unmerge, unsigned UnmergeIdxStart,
903-
unsigned NumElts, unsigned EltSize) {
903+
unsigned NumElts, unsigned EltSize,
904+
bool AllowUndef) {
904905
assert(MergeStartIdx + NumElts <= MI.getNumSources());
905906
for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) {
906907
unsigned EltUnmergeIdx;
907908
GUnmerge *EltUnmerge = findUnmergeThatDefinesReg(
908909
MI.getSourceReg(i), EltSize, EltUnmergeIdx);
909910
// Check if source i comes from the same Unmerge.
910-
if (!EltUnmerge || EltUnmerge != Unmerge)
911-
return false;
912-
// Check that source i's def has same index in sequence in Unmerge.
913-
if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart)
911+
if (EltUnmerge == Unmerge) {
912+
// Check that source i's def has same index in sequence in Unmerge.
913+
if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart)
914+
return false;
915+
} else if (!AllowUndef ||
916+
MRI.getVRegDef(MI.getSourceReg(i))->getOpcode() !=
917+
TargetOpcode::G_IMPLICIT_DEF)
914918
return false;
915919
}
916920
return true;
@@ -944,8 +948,10 @@ class LegalizationArtifactCombiner {
944948
//
945949
// %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty)
946950
if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) {
947-
if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize))
951+
if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize,
952+
/*AllowUndef=*/DstTy.isVector()))
948953
return false;
954+
949955
replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer);
950956
DeadInsts.push_back(&MI);
951957
return true;
@@ -965,7 +971,7 @@ class LegalizationArtifactCombiner {
965971
(Elt0UnmergeIdx % NumMIElts == 0) &&
966972
getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
967973
if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,
968-
EltSize))
974+
EltSize, false))
969975
return false;
970976
MIB.setInstrAndDebugLoc(MI);
971977
auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg());
@@ -998,7 +1004,8 @@ class LegalizationArtifactCombiner {
9981004
if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) ||
9991005
(EltUnmergeIdx != 0))
10001006
return false;
1001-
if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize))
1007+
if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize,
1008+
false))
10021009
return false;
10031010
ConcatSources.push_back(UnmergeI->getSourceReg());
10041011
}

llvm/test/CodeGen/AArch64/fabs.ll

Lines changed: 12 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -115,25 +115,10 @@ entry:
115115
}
116116

117117
define <3 x float> @fabs_v3f32(<3 x float> %a) {
118-
; CHECK-SD-LABEL: fabs_v3f32:
119-
; CHECK-SD: // %bb.0: // %entry
120-
; CHECK-SD-NEXT: fabs v0.4s, v0.4s
121-
; CHECK-SD-NEXT: ret
122-
;
123-
; CHECK-GI-LABEL: fabs_v3f32:
124-
; CHECK-GI: // %bb.0: // %entry
125-
; CHECK-GI-NEXT: mov s1, v0.s[1]
126-
; CHECK-GI-NEXT: mov s2, v0.s[2]
127-
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
128-
; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
129-
; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
130-
; CHECK-GI-NEXT: fabs v0.4s, v0.4s
131-
; CHECK-GI-NEXT: mov s1, v0.s[1]
132-
; CHECK-GI-NEXT: mov s2, v0.s[2]
133-
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
134-
; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
135-
; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
136-
; CHECK-GI-NEXT: ret
118+
; CHECK-LABEL: fabs_v3f32:
119+
; CHECK: // %bb.0: // %entry
120+
; CHECK-NEXT: fabs v0.4s, v0.4s
121+
; CHECK-NEXT: ret
137122
entry:
138123
%c = call <3 x float> @llvm.fabs.v3f32(<3 x float> %a)
139124
ret <3 x float> %c
@@ -213,29 +198,19 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
213198
; CHECK-GI-NOFP16: // %bb.0: // %entry
214199
; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[4]
215200
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
216-
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
201+
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v0.4h
202+
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[6]
217203
; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v2.h[0]
218-
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v3.h[0]
204+
; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v3.4s
205+
; CHECK-GI-NOFP16-NEXT: mov v1.h[2], v0.h[0]
206+
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
219207
; CHECK-GI-NOFP16-NEXT: mov v1.h[3], v0.h[0]
220-
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
221-
; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
222-
; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
223-
; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
224-
; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
225-
; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
226-
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
227-
; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
228-
; CHECK-GI-NOFP16-NEXT: fabs v0.4s, v0.4s
229-
; CHECK-GI-NOFP16-NEXT: mov s2, v1.s[1]
230-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
231-
; CHECK-GI-NOFP16-NEXT: mov s3, v1.s[2]
232-
; CHECK-GI-NOFP16-NEXT: mov v1.s[1], v2.s[0]
233208
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
234-
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
235-
; CHECK-GI-NOFP16-NEXT: mov v1.s[2], v3.s[0]
236209
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2]
210+
; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[3]
211+
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
237212
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
238-
; CHECK-GI-NOFP16-NEXT: mov v1.s[3], v0.s[0]
213+
; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
239214
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v3.h[0]
240215
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
241216
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v4.h[0]
@@ -249,33 +224,7 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
249224
;
250225
; CHECK-GI-FP16-LABEL: fabs_v7f16:
251226
; CHECK-GI-FP16: // %bb.0: // %entry
252-
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
253-
; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
254-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
255-
; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
256-
; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
257-
; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
258-
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
259-
; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
260-
; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
261-
; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
262-
; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
263-
; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
264-
; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
265227
; CHECK-GI-FP16-NEXT: fabs v0.8h, v0.8h
266-
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
267-
; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
268-
; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
269-
; CHECK-GI-FP16-NEXT: mov h4, v0.h[4]
270-
; CHECK-GI-FP16-NEXT: mov h5, v0.h[5]
271-
; CHECK-GI-FP16-NEXT: mov h6, v0.h[6]
272-
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
273-
; CHECK-GI-FP16-NEXT: mov v0.h[2], v2.h[0]
274-
; CHECK-GI-FP16-NEXT: mov v0.h[3], v3.h[0]
275-
; CHECK-GI-FP16-NEXT: mov v0.h[4], v4.h[0]
276-
; CHECK-GI-FP16-NEXT: mov v0.h[5], v5.h[0]
277-
; CHECK-GI-FP16-NEXT: mov v0.h[6], v6.h[0]
278-
; CHECK-GI-FP16-NEXT: mov v0.h[7], v0.h[0]
279228
; CHECK-GI-FP16-NEXT: ret
280229
entry:
281230
%c = call <7 x half> @llvm.fabs.v7f16(<7 x half> %a)

0 commit comments

Comments
 (0)