3
3
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4
4
5
5
; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2i1
6
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4i8
7
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v32i8
8
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i16
9
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v16i16
10
6
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i1_zeroes
11
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4i8_zeroes
12
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v32i8_zeroes
13
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2i16_zeroes
14
- ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v16i16_zeroes
15
7
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8
16
8
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v3i8_zeroes
17
9
@@ -205,68 +197,142 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
205
197
}
206
198
207
199
define i32 @shufflevector_v4i8 (<4 x i8 > %a , <4 x i8 > %b ){
208
- ; CHECK-LABEL: shufflevector_v4i8:
209
- ; CHECK: // %bb.0:
210
- ; CHECK-NEXT: sub sp, sp, #16
211
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
212
- ; CHECK-NEXT: ext v0.8b, v1.8b, v0.8b, #6
213
- ; CHECK-NEXT: zip1 v1.4h, v1.4h, v0.4h
214
- ; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
215
- ; CHECK-NEXT: xtn v0.8b, v0.8h
216
- ; CHECK-NEXT: fmov w0, s0
217
- ; CHECK-NEXT: add sp, sp, #16
218
- ; CHECK-NEXT: ret
200
+ ; CHECK-SD-LABEL: shufflevector_v4i8:
201
+ ; CHECK-SD: // %bb.0:
202
+ ; CHECK-SD-NEXT: sub sp, sp, #16
203
+ ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
204
+ ; CHECK-SD-NEXT: ext v0.8b, v1.8b, v0.8b, #6
205
+ ; CHECK-SD-NEXT: zip1 v1.4h, v1.4h, v0.4h
206
+ ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
207
+ ; CHECK-SD-NEXT: xtn v0.8b, v0.8h
208
+ ; CHECK-SD-NEXT: fmov w0, s0
209
+ ; CHECK-SD-NEXT: add sp, sp, #16
210
+ ; CHECK-SD-NEXT: ret
211
+ ;
212
+ ; CHECK-GI-LABEL: shufflevector_v4i8:
213
+ ; CHECK-GI: // %bb.0:
214
+ ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
215
+ ; CHECK-GI-NEXT: mov h2, v0.h[1]
216
+ ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
217
+ ; CHECK-GI-NEXT: mov h3, v1.h[1]
218
+ ; CHECK-GI-NEXT: adrp x8, .LCPI15_0
219
+ ; CHECK-GI-NEXT: mov h4, v0.h[2]
220
+ ; CHECK-GI-NEXT: mov h5, v0.h[3]
221
+ ; CHECK-GI-NEXT: mov h6, v1.h[3]
222
+ ; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
223
+ ; CHECK-GI-NEXT: mov h2, v1.h[2]
224
+ ; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
225
+ ; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
226
+ ; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
227
+ ; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
228
+ ; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
229
+ ; CHECK-GI-NEXT: mov v0.b[4], v0.b[0]
230
+ ; CHECK-GI-NEXT: mov v1.b[4], v0.b[0]
231
+ ; CHECK-GI-NEXT: mov v0.b[5], v0.b[0]
232
+ ; CHECK-GI-NEXT: mov v1.b[5], v0.b[0]
233
+ ; CHECK-GI-NEXT: mov v0.b[6], v0.b[0]
234
+ ; CHECK-GI-NEXT: mov v1.b[6], v0.b[0]
235
+ ; CHECK-GI-NEXT: mov v0.b[7], v0.b[0]
236
+ ; CHECK-GI-NEXT: mov v1.b[7], v0.b[0]
237
+ ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
238
+ ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
239
+ ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
240
+ ; CHECK-GI-NEXT: fmov w0, s0
241
+ ; CHECK-GI-NEXT: ret
219
242
%c = shufflevector <4 x i8 > %a , <4 x i8 > %b , <4 x i32 > <i32 1 , i32 2 , i32 4 , i32 7 >
220
243
%d = bitcast <4 x i8 > %c to i32
221
244
ret i32 %d
222
245
}
223
246
224
247
define <32 x i8 > @shufflevector_v32i8 (<32 x i8 > %a , <32 x i8 > %b ){
225
- ; CHECK-LABEL: shufflevector_v32i8:
226
- ; CHECK: // %bb.0:
227
- ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q1_q2
228
- ; CHECK-NEXT: adrp x8, .LCPI16_0
229
- ; CHECK-NEXT: adrp x9, .LCPI16_1
230
- ; CHECK-NEXT: mov v1.16b, v0.16b
231
- ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
232
- ; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
233
- ; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
234
- ; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
235
- ; CHECK-NEXT: ret
248
+ ; CHECK-SD-LABEL: shufflevector_v32i8:
249
+ ; CHECK-SD: // %bb.0:
250
+ ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
251
+ ; CHECK-SD-NEXT: adrp x8, .LCPI16_0
252
+ ; CHECK-SD-NEXT: adrp x9, .LCPI16_1
253
+ ; CHECK-SD-NEXT: mov v1.16b, v0.16b
254
+ ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0]
255
+ ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1]
256
+ ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
257
+ ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
258
+ ; CHECK-SD-NEXT: ret
259
+ ;
260
+ ; CHECK-GI-LABEL: shufflevector_v32i8:
261
+ ; CHECK-GI: // %bb.0:
262
+ ; CHECK-GI-NEXT: mov v3.16b, v0.16b
263
+ ; CHECK-GI-NEXT: adrp x8, .LCPI16_1
264
+ ; CHECK-GI-NEXT: adrp x9, .LCPI16_0
265
+ ; CHECK-GI-NEXT: mov v4.16b, v2.16b
266
+ ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1]
267
+ ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0]
268
+ ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
269
+ ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
270
+ ; CHECK-GI-NEXT: ret
236
271
%c = shufflevector <32 x i8 > %a , <32 x i8 > %b , <32 x i32 > <i32 0 , i32 32 , i32 32 , i32 32 , i32 1 , i32 32 , i32 32 , i32 32 , i32 2 , i32 32 , i32 32 , i32 32 , i32 3 , i32 32 , i32 32 , i32 32 , i32 4 , i32 32 , i32 32 , i32 32 , i32 5 , i32 32 , i32 32 , i32 32 , i32 6 , i32 32 , i32 32 , i32 32 , i32 7 , i32 32 , i32 32 , i32 32 >
237
272
ret <32 x i8 > %c
238
273
}
239
274
240
275
define i32 @shufflevector_v2i16 (<2 x i16 > %a , <2 x i16 > %b ){
241
- ; CHECK-LABEL: shufflevector_v2i16:
242
- ; CHECK: // %bb.0:
243
- ; CHECK-NEXT: sub sp, sp, #16
244
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
245
- ; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
246
- ; CHECK-NEXT: mov w8, v0.s[1]
247
- ; CHECK-NEXT: fmov w9, s0
248
- ; CHECK-NEXT: strh w9, [sp, #12]
249
- ; CHECK-NEXT: strh w8, [sp, #14]
250
- ; CHECK-NEXT: ldr w0, [sp, #12]
251
- ; CHECK-NEXT: add sp, sp, #16
252
- ; CHECK-NEXT: ret
276
+ ; CHECK-SD-LABEL: shufflevector_v2i16:
277
+ ; CHECK-SD: // %bb.0:
278
+ ; CHECK-SD-NEXT: sub sp, sp, #16
279
+ ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
280
+ ; CHECK-SD-NEXT: ext v0.8b, v0.8b, v1.8b, #4
281
+ ; CHECK-SD-NEXT: mov w8, v0.s[1]
282
+ ; CHECK-SD-NEXT: fmov w9, s0
283
+ ; CHECK-SD-NEXT: strh w9, [sp, #12]
284
+ ; CHECK-SD-NEXT: strh w8, [sp, #14]
285
+ ; CHECK-SD-NEXT: ldr w0, [sp, #12]
286
+ ; CHECK-SD-NEXT: add sp, sp, #16
287
+ ; CHECK-SD-NEXT: ret
288
+ ;
289
+ ; CHECK-GI-LABEL: shufflevector_v2i16:
290
+ ; CHECK-GI: // %bb.0:
291
+ ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
292
+ ; CHECK-GI-NEXT: mov s2, v0.s[1]
293
+ ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
294
+ ; CHECK-GI-NEXT: mov s3, v1.s[1]
295
+ ; CHECK-GI-NEXT: adrp x8, .LCPI17_0
296
+ ; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
297
+ ; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
298
+ ; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]
299
+ ; CHECK-GI-NEXT: mov v1.h[2], v0.h[0]
300
+ ; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
301
+ ; CHECK-GI-NEXT: mov v1.h[3], v0.h[0]
302
+ ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
303
+ ; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0]
304
+ ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b
305
+ ; CHECK-GI-NEXT: fmov w0, s0
306
+ ; CHECK-GI-NEXT: ret
253
307
%c = shufflevector <2 x i16 > %a , <2 x i16 > %b , <2 x i32 > <i32 1 , i32 2 >
254
308
%d = bitcast <2 x i16 > %c to i32
255
309
ret i32 %d
256
310
}
257
311
258
312
define <16 x i16 > @shufflevector_v16i16 (<16 x i16 > %a , <16 x i16 > %b ){
259
- ; CHECK-LABEL: shufflevector_v16i16:
260
- ; CHECK: // %bb.0:
261
- ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q1_q2
262
- ; CHECK-NEXT: adrp x8, .LCPI18_0
263
- ; CHECK-NEXT: adrp x9, .LCPI18_1
264
- ; CHECK-NEXT: mov v1.16b, v0.16b
265
- ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
266
- ; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
267
- ; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
268
- ; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
269
- ; CHECK-NEXT: ret
313
+ ; CHECK-SD-LABEL: shufflevector_v16i16:
314
+ ; CHECK-SD: // %bb.0:
315
+ ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2
316
+ ; CHECK-SD-NEXT: adrp x8, .LCPI18_0
317
+ ; CHECK-SD-NEXT: adrp x9, .LCPI18_1
318
+ ; CHECK-SD-NEXT: mov v1.16b, v0.16b
319
+ ; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0]
320
+ ; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1]
321
+ ; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b
322
+ ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b
323
+ ; CHECK-SD-NEXT: ret
324
+ ;
325
+ ; CHECK-GI-LABEL: shufflevector_v16i16:
326
+ ; CHECK-GI: // %bb.0:
327
+ ; CHECK-GI-NEXT: mov v3.16b, v0.16b
328
+ ; CHECK-GI-NEXT: adrp x8, .LCPI18_1
329
+ ; CHECK-GI-NEXT: adrp x9, .LCPI18_0
330
+ ; CHECK-GI-NEXT: mov v4.16b, v2.16b
331
+ ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1]
332
+ ; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0]
333
+ ; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b
334
+ ; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
335
+ ; CHECK-GI-NEXT: ret
270
336
%c = shufflevector <16 x i16 > %a , <16 x i16 > %b , <16 x i32 > <i32 0 , i32 16 , i32 16 , i32 16 , i32 1 , i32 16 , i32 16 , i32 16 , i32 1 , i32 16 , i32 16 , i32 16 , i32 3 , i32 16 , i32 16 , i32 16 >
271
337
ret <16 x i16 > %c
272
338
}
@@ -332,16 +398,23 @@ define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
332
398
}
333
399
334
400
define i32 @shufflevector_v4i8_zeroes (<4 x i8 > %a , <4 x i8 > %b ){
335
- ; CHECK-LABEL: shufflevector_v4i8_zeroes:
336
- ; CHECK: // %bb.0:
337
- ; CHECK-NEXT: sub sp, sp, #16
338
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
339
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
340
- ; CHECK-NEXT: dup v0.4h, v0.h[0]
341
- ; CHECK-NEXT: xtn v0.8b, v0.8h
342
- ; CHECK-NEXT: fmov w0, s0
343
- ; CHECK-NEXT: add sp, sp, #16
344
- ; CHECK-NEXT: ret
401
+ ; CHECK-SD-LABEL: shufflevector_v4i8_zeroes:
402
+ ; CHECK-SD: // %bb.0:
403
+ ; CHECK-SD-NEXT: sub sp, sp, #16
404
+ ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
405
+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
406
+ ; CHECK-SD-NEXT: dup v0.4h, v0.h[0]
407
+ ; CHECK-SD-NEXT: xtn v0.8b, v0.8h
408
+ ; CHECK-SD-NEXT: fmov w0, s0
409
+ ; CHECK-SD-NEXT: add sp, sp, #16
410
+ ; CHECK-SD-NEXT: ret
411
+ ;
412
+ ; CHECK-GI-LABEL: shufflevector_v4i8_zeroes:
413
+ ; CHECK-GI: // %bb.0:
414
+ ; CHECK-GI-NEXT: fmov w8, s0
415
+ ; CHECK-GI-NEXT: dup v0.8b, w8
416
+ ; CHECK-GI-NEXT: fmov w0, s0
417
+ ; CHECK-GI-NEXT: ret
345
418
%c = shufflevector <4 x i8 > %a , <4 x i8 > %b , <4 x i32 > <i32 0 , i32 0 , i32 0 , i32 0 >
346
419
%d = bitcast <4 x i8 > %c to i32
347
420
ret i32 %d
@@ -358,19 +431,26 @@ define <32 x i8> @shufflevector_v32i8_zeroes(<32 x i8> %a, <32 x i8> %b){
358
431
}
359
432
360
433
define i32 @shufflevector_v2i16_zeroes (<2 x i16 > %a , <2 x i16 > %b ){
361
- ; CHECK-LABEL: shufflevector_v2i16_zeroes:
362
- ; CHECK: // %bb.0:
363
- ; CHECK-NEXT: sub sp, sp, #16
364
- ; CHECK-NEXT: .cfi_def_cfa_offset 16
365
- ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
366
- ; CHECK-NEXT: dup v1.2s, v0.s[0]
367
- ; CHECK-NEXT: fmov w9, s0
368
- ; CHECK-NEXT: strh w9, [sp, #12]
369
- ; CHECK-NEXT: mov w8, v1.s[1]
370
- ; CHECK-NEXT: strh w8, [sp, #14]
371
- ; CHECK-NEXT: ldr w0, [sp, #12]
372
- ; CHECK-NEXT: add sp, sp, #16
373
- ; CHECK-NEXT: ret
434
+ ; CHECK-SD-LABEL: shufflevector_v2i16_zeroes:
435
+ ; CHECK-SD: // %bb.0:
436
+ ; CHECK-SD-NEXT: sub sp, sp, #16
437
+ ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
438
+ ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
439
+ ; CHECK-SD-NEXT: dup v1.2s, v0.s[0]
440
+ ; CHECK-SD-NEXT: fmov w9, s0
441
+ ; CHECK-SD-NEXT: strh w9, [sp, #12]
442
+ ; CHECK-SD-NEXT: mov w8, v1.s[1]
443
+ ; CHECK-SD-NEXT: strh w8, [sp, #14]
444
+ ; CHECK-SD-NEXT: ldr w0, [sp, #12]
445
+ ; CHECK-SD-NEXT: add sp, sp, #16
446
+ ; CHECK-SD-NEXT: ret
447
+ ;
448
+ ; CHECK-GI-LABEL: shufflevector_v2i16_zeroes:
449
+ ; CHECK-GI: // %bb.0:
450
+ ; CHECK-GI-NEXT: fmov w8, s0
451
+ ; CHECK-GI-NEXT: dup v0.4h, w8
452
+ ; CHECK-GI-NEXT: fmov w0, s0
453
+ ; CHECK-GI-NEXT: ret
374
454
%c = shufflevector <2 x i16 > %a , <2 x i16 > %b , <2 x i32 > <i32 0 , i32 0 >
375
455
%d = bitcast <2 x i16 > %c to i32
376
456
ret i32 %d
0 commit comments