Skip to content

Commit cebc960

Browse files
authored
[AArch64] Match ZIP and UZP starting from undef elements. (#89578)
In case the first element of a zip/uzp mask is undef, the isZIPMask and isUZPMask functions have a 50% chance of picking the wrong "WhichResult", meaning they don't match a zip/uzp where they could. This patch alters the matching code to first check for the first non-undef element, to try and get WhichResult correct.
1 parent c45fbfd commit cebc960

File tree

3 files changed

+49
-42
lines changed

3 files changed

+49
-42
lines changed

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6620,32 +6620,63 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
66206620
return (PFEntry >> 30) + 1;
66216621
}
66226622

6623-
inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6623+
/// Return true for zip1 or zip2 masks of the form:
6624+
/// <0, 8, 1, 9, 2, 10, 3, 11> or
6625+
/// <4, 12, 5, 13, 6, 14, 7, 15>
6626+
inline bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
66246627
unsigned NumElts = VT.getVectorNumElements();
66256628
if (NumElts % 2 != 0)
66266629
return false;
6627-
WhichResult = (M[0] == 0 ? 0 : 1);
6630+
// Check the first non-undef element for which half to use.
6631+
unsigned WhichResult = 2;
6632+
for (unsigned i = 0; i != NumElts / 2; i++) {
6633+
if (M[i * 2] >= 0) {
6634+
WhichResult = ((unsigned)M[i * 2] == i ? 0 : 1);
6635+
break;
6636+
} else if (M[i * 2 + 1] >= 0) {
6637+
WhichResult = ((unsigned)M[i * 2 + 1] == NumElts + i ? 0 : 1);
6638+
break;
6639+
}
6640+
}
6641+
if (WhichResult == 2)
6642+
return false;
6643+
6644+
// Check all elements match.
66286645
unsigned Idx = WhichResult * NumElts / 2;
66296646
for (unsigned i = 0; i != NumElts; i += 2) {
66306647
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
66316648
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
66326649
return false;
66336650
Idx += 1;
66346651
}
6635-
6652+
WhichResultOut = WhichResult;
66366653
return true;
66376654
}
66386655

6639-
inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6656+
/// Return true for uzp1 or uzp2 masks of the form:
6657+
/// <0, 2, 4, 6, 8, 10, 12, 14> or
6658+
/// <1, 3, 5, 7, 9, 11, 13, 15>
6659+
inline bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResultOut) {
66406660
unsigned NumElts = VT.getVectorNumElements();
6641-
WhichResult = (M[0] == 0 ? 0 : 1);
6661+
// Check the first non-undef element for which half to use.
6662+
unsigned WhichResult = 2;
6663+
for (unsigned i = 0; i != NumElts; i++) {
6664+
if (M[i] >= 0) {
6665+
WhichResult = ((unsigned)M[i] == i * 2 ? 0 : 1);
6666+
break;
6667+
}
6668+
}
6669+
if (WhichResult == 2)
6670+
return false;
6671+
6672+
// Check all elements match.
66426673
for (unsigned i = 0; i != NumElts; ++i) {
66436674
if (M[i] < 0)
66446675
continue; // ignore UNDEF indices
66456676
if ((unsigned)M[i] != 2 * i + WhichResult)
66466677
return false;
66476678
}
6648-
6679+
WhichResultOut = WhichResult;
66496680
return true;
66506681
}
66516682

llvm/test/CodeGen/AArch64/arm64-uzp.ll

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -110,13 +110,9 @@ define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind {
110110
define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
111111
; CHECK-LABEL: vuzpQi16_undef0:
112112
; CHECK: // %bb.0:
113-
; CHECK-NEXT: adrp x8, .LCPI8_0
114-
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
115-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
116-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
117-
; CHECK-NEXT: uzp2.8h v3, v0, v1
118-
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
119-
; CHECK-NEXT: add.8h v0, v0, v3
113+
; CHECK-NEXT: uzp1.8h v2, v0, v1
114+
; CHECK-NEXT: uzp2.8h v0, v0, v1
115+
; CHECK-NEXT: add.8h v0, v2, v0
120116
; CHECK-NEXT: ret
121117
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
122118
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -127,13 +123,9 @@ define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind {
127123
define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
128124
; CHECK-LABEL: vuzpQi16_undef01:
129125
; CHECK: // %bb.0:
130-
; CHECK-NEXT: adrp x8, .LCPI9_0
131-
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
132-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
133-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
134-
; CHECK-NEXT: uzp2.8h v3, v0, v1
135-
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
136-
; CHECK-NEXT: add.8h v0, v0, v3
126+
; CHECK-NEXT: uzp1.8h v2, v0, v1
127+
; CHECK-NEXT: uzp2.8h v0, v0, v1
128+
; CHECK-NEXT: add.8h v0, v2, v0
137129
; CHECK-NEXT: ret
138130
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
139131
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -144,13 +136,9 @@ define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind {
144136
define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind {
145137
; CHECK-LABEL: vuzpQi16_undef012:
146138
; CHECK: // %bb.0:
147-
; CHECK-NEXT: adrp x8, .LCPI10_0
148-
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
149-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0]
150-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
151-
; CHECK-NEXT: uzp2.8h v3, v0, v1
152-
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
153-
; CHECK-NEXT: add.8h v0, v0, v3
139+
; CHECK-NEXT: uzp1.8h v2, v0, v1
140+
; CHECK-NEXT: uzp2.8h v0, v0, v1
141+
; CHECK-NEXT: add.8h v0, v2, v0
154142
; CHECK-NEXT: ret
155143
%tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 6, i32 8, i32 10, i32 12, i32 14>
156144
%tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15>

llvm/test/CodeGen/AArch64/arm64-zip.ll

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,7 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind {
142142
define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
143143
; CHECK-LABEL: vzip1_undef_01:
144144
; CHECK: // %bb.0:
145-
; CHECK-NEXT: adrp x8, .LCPI8_0
146-
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
147-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0]
148-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
149-
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
145+
; CHECK-NEXT: zip1.8h v0, v0, v1
150146
; CHECK-NEXT: ret
151147
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
152148
ret <8 x i16> %s
@@ -155,11 +151,7 @@ define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind {
155151
define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind {
156152
; CHECK-LABEL: vzip1_undef_0:
157153
; CHECK: // %bb.0:
158-
; CHECK-NEXT: adrp x8, .LCPI9_0
159-
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
160-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0]
161-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
162-
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
154+
; CHECK-NEXT: zip1.8h v0, v0, v1
163155
; CHECK-NEXT: ret
164156
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
165157
ret <8 x i16> %s
@@ -177,11 +169,7 @@ define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind {
177169
define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
178170
; CHECK-LABEL: vzip1_undef_012:
179171
; CHECK: // %bb.0:
180-
; CHECK-NEXT: adrp x8, .LCPI11_0
181-
; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
182-
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
183-
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
184-
; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2
172+
; CHECK-NEXT: zip1.8h v0, v0, v1
185173
; CHECK-NEXT: ret
186174
%s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 9, i32 2, i32 10, i32 3, i32 11>
187175
ret <8 x i16> %s

0 commit comments

Comments
 (0)