Skip to content

Commit a344db7

Browse files
authored
[AArch64][GlobalISel] Legalize G_SHUFFLE_VECTOR for Odd-Sized Vectors (#83038)
Legalize Smaller/Larger than legal vectors with i8 and i16 element sizes. Vectors with elements smaller than i8 will get widened to i8 elements.
1 parent 5a0bd2a commit a344db7

File tree

5 files changed

+212
-108
lines changed

5 files changed

+212
-108
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,18 @@ class LegalizeRuleSet {
908908
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
909909
}
910910

911+
/// Widen the scalar or vector element type to the next power of two that is
912+
/// at least MinSize. No effect if the scalar size is a power of two.
913+
LegalizeRuleSet &widenScalarOrEltToNextPow2OrMinSize(unsigned TypeIdx,
914+
unsigned MinSize = 0) {
915+
using namespace LegalityPredicates;
916+
return actionIf(
917+
LegalizeAction::WidenScalar,
918+
any(scalarOrEltNarrowerThan(TypeIdx, MinSize),
919+
scalarOrEltSizeNotPow2(typeIdx(TypeIdx))),
920+
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
921+
}
922+
911923
LegalizeRuleSet &narrowScalar(unsigned TypeIdx, LegalizeMutation Mutation) {
912924
using namespace LegalityPredicates;
913925
return actionIf(LegalizeAction::NarrowScalar, isScalar(typeIdx(TypeIdx)),

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2495,6 +2495,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
24952495
case TargetOpcode::G_OR:
24962496
case TargetOpcode::G_XOR:
24972497
case TargetOpcode::G_SUB:
2498+
case TargetOpcode::G_SHUFFLE_VECTOR:
24982499
// Perform operation at larger width (any extension is fines here, high bits
24992500
// don't affect the result) and then truncate the result back to the
25002501
// original type.

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
956956
},
957957
changeTo(1, 0))
958958
.moreElementsToNextPow2(0)
959+
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
960+
.clampNumElements(0, v8s8, v16s8)
961+
.clampNumElements(0, v4s16, v8s16)
959962
.clampNumElements(0, v4s32, v4s32)
960963
.clampNumElements(0, v2s64, v2s64)
961964
.moreElementsIf(

llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -287,39 +287,47 @@ body: |
287287
; CHECK-NEXT: %q0:_(<4 x s32>) = COPY $q0
288288
; CHECK-NEXT: %q1:_(<4 x s32>) = COPY $q1
289289
; CHECK-NEXT: %q2:_(<4 x s32>) = COPY $q2
290-
; CHECK-NEXT: %vec_cond0:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
291-
; CHECK-NEXT: %vec_cond1:_(<4 x s1>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
290+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q1
291+
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), %q0(<4 x s32>), %q2
292292
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4100
293-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
294-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
295-
; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %w0(s32), [[C]]
296-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT %cmp(s1)
297-
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ZEXT]], 1
298-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[SEXT_INREG]](s32)
299-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF
300-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
301-
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s1>) = G_INSERT_VECTOR_ELT [[DEF]], [[TRUNC]](s1), [[C2]](s64)
302-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s1>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s1>), [[DEF]], shufflemask(0, 0, 0, 0)
303-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
304-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[C3]](s8)
305-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s1>) = G_BUILD_VECTOR [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1), [[TRUNC1]](s1)
306-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>)
307-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[BUILD_VECTOR1]](<4 x s1>)
308-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[ANYEXT1]]
309-
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[XOR]](<4 x s16>)
310-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond0(<4 x s1>)
311-
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[SHUF]](<4 x s1>)
312-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT2]], [[ANYEXT3]]
313-
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND]](<4 x s16>)
314-
; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT %vec_cond1(<4 x s1>)
315-
; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC2]](<4 x s1>)
316-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[ANYEXT4]], [[ANYEXT5]]
317-
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[AND1]](<4 x s16>)
318-
; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC3]](<4 x s1>)
319-
; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[TRUNC4]](<4 x s1>)
320-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ANYEXT6]], [[ANYEXT7]]
321-
; CHECK-NEXT: %select:_(<4 x s1>) = G_TRUNC [[OR]](<4 x s16>)
322-
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_ZEXT %select(<4 x s1>)
293+
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %w0(s32), [[C]]
294+
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP2]], 1
295+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
296+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
297+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
298+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
299+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
300+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF1]](s16)
301+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
302+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
303+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[TRUNC]](s16), [[C1]](s64)
304+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
305+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
306+
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
307+
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
308+
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
309+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
310+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
311+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
312+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<8 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef)
313+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s8>), [[UV5:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>)
314+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
315+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
316+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
317+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
318+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY3]](s16), [[COPY4]](s16), [[COPY5]](s16), [[C2]](s16)
319+
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
320+
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[BUILD_VECTOR3]]
321+
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
322+
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
323+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[ANYEXT1]]
324+
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
325+
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC6]], [[XOR]]
326+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
327+
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>)
328+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
329+
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
330+
; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT2]], [[BUILD_VECTOR4]]
323331
; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
324332
; CHECK-NEXT: RET_ReallyLR implicit $q0
325333
%w0:_(s32) = COPY $w0

llvm/test/CodeGen/AArch64/shufflevector.ll

Lines changed: 156 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,7 @@
33
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4i8
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v32i8
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i16
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v16i16
106
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4i8_zeroes
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v32i8_zeroes
13-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i16_zeroes
14-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v16i16_zeroes
157
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8
168
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8_zeroes
179

@@ -205,68 +197,142 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
205197
}
206198

207199
define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
208-
; CHECK-LABEL: shufflevector_v4i8:
209-
; CHECK: // %bb.0:
210-
; CHECK-NEXT: sub sp, sp, #16
211-
; CHECK-NEXT: .cfi_def_cfa_offset 16
212-
; CHECK-NEXT: ext v0.8b, v1.8b, v0.8b, #6
213-
; CHECK-NEXT: zip1 v1.4h, v1.4h, v0.4h
214-
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
215-
; CHECK-NEXT: xtn v0.8b, v0.8h
216-
; CHECK-NEXT: fmov w0, s0
217-
; CHECK-NEXT: add sp, sp, #16
218-
; CHECK-NEXT: ret
200+
; CHECK-SD-LABEL: shufflevector_v4i8:
201+
; CHECK-SD: // %bb.0:
202+
; CHECK-SD-NEXT: sub sp, sp, #16
203+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
204+
; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6
205+
; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h
206+
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
207+
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
208+
; CHECK-SD-NEXT: fmov w0, s0
209+
; CHECK-SD-NEXT: add sp, sp, #16
210+
; CHECK-SD-NEXT: ret
211+
;
212+
; CHECK-GI-LABEL: shufflevector_v4i8:
213+
; CHECK-GI: // %bb.0:
214+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
215+
; CHECK-GI-NEXT: mov h2, v0.h[1]
216+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
217+
; CHECK-GI-NEXT: mov h3, v1.h[1]
218+
; CHECK-GI-NEXT: adrp x8, .LCPI15_0
219+
; CHECK-GI-NEXT: mov h4, v0.h[2]
220+
; CHECK-GI-NEXT: mov h5, v0.h[3]
221+
; CHECK-GI-NEXT: mov h6, v1.h[3]
222+
; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
223+
; CHECK-GI-NEXT: mov h2, v1.h[2]
224+
; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
225+
; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
226+
; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
227+
; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
228+
; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
229+
; CHECK-GI-NEXT: mov v0.b[4], v0.b[0]
230+
; CHECK-GI-NEXT: mov v1.b[4], v0.b[0]
231+
; CHECK-GI-NEXT: mov v0.b[5], v0.b[0]
232+
; CHECK-GI-NEXT: mov v1.b[5], v0.b[0]
233+
; CHECK-GI-NEXT: mov v0.b[6], v0.b[0]
234+
; CHECK-GI-NEXT: mov v1.b[6], v0.b[0]
235+
; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
236+
; CHECK-GI-NEXT: mov v1.b[7], v0.b[0]
237+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
238+
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
239+
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
240+
; CHECK-GI-NEXT: fmov w0, s0
241+
; CHECK-GI-NEXT: ret
219242
%c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 7>
220243
%d = bitcast <4 x i8> %c to i32
221244
ret i32 %d
222245
}
223246

224247
define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
225-
; CHECK-LABEL: shufflevector_v32i8:
226-
; CHECK: // %bb.0:
227-
; CHECK-NEXT: // kill: def $q2 killed $q2 def $q1_q2
228-
; CHECK-NEXT: adrp x8, .LCPI16_0
229-
; CHECK-NEXT: adrp x9, .LCPI16_1
230-
; CHECK-NEXT: mov v1.16b, v0.16b
231-
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
232-
; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
233-
; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
234-
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
235-
; CHECK-NEXT: ret
248+
; CHECK-SD-LABEL: shufflevector_v32i8:
249+
; CHECK-SD: // %bb.0:
250+
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
251+
; CHECK-SD-NEXT: adrp x8, .LCPI16_0
252+
; CHECK-SD-NEXT: adrp x9, .LCPI16_1
253+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
254+
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
255+
; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
256+
; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
257+
; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
258+
; CHECK-SD-NEXT: ret
259+
;
260+
; CHECK-GI-LABEL: shufflevector_v32i8:
261+
; CHECK-GI: // %bb.0:
262+
; CHECK-GI-NEXT: mov v3.16b, v0.16b
263+
; CHECK-GI-NEXT: adrp x8, .LCPI16_1
264+
; CHECK-GI-NEXT: adrp x9, .LCPI16_0
265+
; CHECK-GI-NEXT: mov v4.16b, v2.16b
266+
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
267+
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0]
268+
; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
269+
; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
270+
; CHECK-GI-NEXT: ret
236271
%c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
237272
ret <32 x i8> %c
238273
}
239274

240275
define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
241-
; CHECK-LABEL: shufflevector_v2i16:
242-
; CHECK: // %bb.0:
243-
; CHECK-NEXT: sub sp, sp, #16
244-
; CHECK-NEXT: .cfi_def_cfa_offset 16
245-
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
246-
; CHECK-NEXT: mov w8, v0.s[1]
247-
; CHECK-NEXT: fmov w9, s0
248-
; CHECK-NEXT: strh w9, [sp, #12]
249-
; CHECK-NEXT: strh w8, [sp, #14]
250-
; CHECK-NEXT: ldr w0, [sp, #12]
251-
; CHECK-NEXT: add sp, sp, #16
252-
; CHECK-NEXT: ret
276+
; CHECK-SD-LABEL: shufflevector_v2i16:
277+
; CHECK-SD: // %bb.0:
278+
; CHECK-SD-NEXT: sub sp, sp, #16
279+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
280+
; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
281+
; CHECK-SD-NEXT: mov w8, v0.s[1]
282+
; CHECK-SD-NEXT: fmov w9, s0
283+
; CHECK-SD-NEXT: strh w9, [sp, #12]
284+
; CHECK-SD-NEXT: strh w8, [sp, #14]
285+
; CHECK-SD-NEXT: ldr w0, [sp, #12]
286+
; CHECK-SD-NEXT: add sp, sp, #16
287+
; CHECK-SD-NEXT: ret
288+
;
289+
; CHECK-GI-LABEL: shufflevector_v2i16:
290+
; CHECK-GI: // %bb.0:
291+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
292+
; CHECK-GI-NEXT: mov s2, v0.s[1]
293+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
294+
; CHECK-GI-NEXT: mov s3, v1.s[1]
295+
; CHECK-GI-NEXT: adrp x8, .LCPI17_0
296+
; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
297+
; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
298+
; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]
299+
; CHECK-GI-NEXT: mov v1.h[2], v0.h[0]
300+
; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
301+
; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
302+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
303+
; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0]
304+
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
305+
; CHECK-GI-NEXT: fmov w0, s0
306+
; CHECK-GI-NEXT: ret
253307
%c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 1, i32 2>
254308
%d = bitcast <2 x i16> %c to i32
255309
ret i32 %d
256310
}
257311

258312
define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
259-
; CHECK-LABEL: shufflevector_v16i16:
260-
; CHECK: // %bb.0:
261-
; CHECK-NEXT: // kill: def $q2 killed $q2 def $q1_q2
262-
; CHECK-NEXT: adrp x8, .LCPI18_0
263-
; CHECK-NEXT: adrp x9, .LCPI18_1
264-
; CHECK-NEXT: mov v1.16b, v0.16b
265-
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
266-
; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
267-
; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
268-
; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
269-
; CHECK-NEXT: ret
313+
; CHECK-SD-LABEL: shufflevector_v16i16:
314+
; CHECK-SD: // %bb.0:
315+
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
316+
; CHECK-SD-NEXT: adrp x8, .LCPI18_0
317+
; CHECK-SD-NEXT: adrp x9, .LCPI18_1
318+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
319+
; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
320+
; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
321+
; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
322+
; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
323+
; CHECK-SD-NEXT: ret
324+
;
325+
; CHECK-GI-LABEL: shufflevector_v16i16:
326+
; CHECK-GI: // %bb.0:
327+
; CHECK-GI-NEXT: mov v3.16b, v0.16b
328+
; CHECK-GI-NEXT: adrp x8, .LCPI18_1
329+
; CHECK-GI-NEXT: adrp x9, .LCPI18_0
330+
; CHECK-GI-NEXT: mov v4.16b, v2.16b
331+
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
332+
; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0]
333+
; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
334+
; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
335+
; CHECK-GI-NEXT: ret
270336
%c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
271337
ret <16 x i16> %c
272338
}
@@ -332,16 +398,23 @@ define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
332398
}
333399

334400
define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
335-
; CHECK-LABEL: shufflevector_v4i8_zeroes:
336-
; CHECK: // %bb.0:
337-
; CHECK-NEXT: sub sp, sp, #16
338-
; CHECK-NEXT: .cfi_def_cfa_offset 16
339-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
340-
; CHECK-NEXT: dup v0.4h, v0.h[0]
341-
; CHECK-NEXT: xtn v0.8b, v0.8h
342-
; CHECK-NEXT: fmov w0, s0
343-
; CHECK-NEXT: add sp, sp, #16
344-
; CHECK-NEXT: ret
401+
; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
402+
; CHECK-SD: // %bb.0:
403+
; CHECK-SD-NEXT: sub sp, sp, #16
404+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
405+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
406+
; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
407+
; CHECK-SD-NEXT: xtn v0.8b, v0.8h
408+
; CHECK-SD-NEXT: fmov w0, s0
409+
; CHECK-SD-NEXT: add sp, sp, #16
410+
; CHECK-SD-NEXT: ret
411+
;
412+
; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
413+
; CHECK-GI: // %bb.0:
414+
; CHECK-GI-NEXT: fmov w8, s0
415+
; CHECK-GI-NEXT: dup v0.8b, w8
416+
; CHECK-GI-NEXT: fmov w0, s0
417+
; CHECK-GI-NEXT: ret
345418
%c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
346419
%d = bitcast <4 x i8> %c to i32
347420
ret i32 %d
@@ -358,19 +431,26 @@ define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
358431
}
359432

360433
define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
361-
; CHECK-LABEL: shufflevector_v2i16_zeroes:
362-
; CHECK: // %bb.0:
363-
; CHECK-NEXT: sub sp, sp, #16
364-
; CHECK-NEXT: .cfi_def_cfa_offset 16
365-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
366-
; CHECK-NEXT: dup v1.2s, v0.s[0]
367-
; CHECK-NEXT: fmov w9, s0
368-
; CHECK-NEXT: strh w9, [sp, #12]
369-
; CHECK-NEXT: mov w8, v1.s[1]
370-
; CHECK-NEXT: strh w8, [sp, #14]
371-
; CHECK-NEXT: ldr w0, [sp, #12]
372-
; CHECK-NEXT: add sp, sp, #16
373-
; CHECK-NEXT: ret
434+
; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
435+
; CHECK-SD: // %bb.0:
436+
; CHECK-SD-NEXT: sub sp, sp, #16
437+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
438+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
439+
; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
440+
; CHECK-SD-NEXT: fmov w9, s0
441+
; CHECK-SD-NEXT: strh w9, [sp, #12]
442+
; CHECK-SD-NEXT: mov w8, v1.s[1]
443+
; CHECK-SD-NEXT: strh w8, [sp, #14]
444+
; CHECK-SD-NEXT: ldr w0, [sp, #12]
445+
; CHECK-SD-NEXT: add sp, sp, #16
446+
; CHECK-SD-NEXT: ret
447+
;
448+
; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
449+
; CHECK-GI: // %bb.0:
450+
; CHECK-GI-NEXT: fmov w8, s0
451+
; CHECK-GI-NEXT: dup v0.4h, w8
452+
; CHECK-GI-NEXT: fmov w0, s0
453+
; CHECK-GI-NEXT: ret
374454
%c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> <i32 0, i32 0>
375455
%d = bitcast <2 x i16> %c to i32
376456
ret i32 %d

0 commit comments

Comments
 (0)