@@ -143,15 +143,8 @@ define <4 x float> @extract_v4f32_nxv16f32_12(<vscale x 16 x float> %arg) {
143
143
define <2 x float > @extract_v2f32_nxv16f32_2 (<vscale x 16 x float > %arg ) {
144
144
; CHECK-LABEL: extract_v2f32_nxv16f32_2:
145
145
; CHECK: // %bb.0:
146
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
147
- ; CHECK-NEXT: addvl sp, sp, #-1
148
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
149
- ; CHECK-NEXT: .cfi_offset w29, -16
150
- ; CHECK-NEXT: ptrue p0.s
151
- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
152
- ; CHECK-NEXT: ldr d0, [sp, #8]
153
- ; CHECK-NEXT: addvl sp, sp, #1
154
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
146
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
147
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
155
148
; CHECK-NEXT: ret
156
149
%ext = call <2 x float > @llvm.vector.extract.v2f32.nxv16f32 (<vscale x 16 x float > %arg , i64 2 )
157
150
ret <2 x float > %ext
@@ -274,15 +267,8 @@ define <4 x i3> @extract_v4i3_nxv32i3_16(<vscale x 32 x i3> %arg) {
274
267
define <2 x i32 > @extract_v2i32_nxv16i32_2 (<vscale x 16 x i32 > %arg ) {
275
268
; CHECK-LABEL: extract_v2i32_nxv16i32_2:
276
269
; CHECK: // %bb.0:
277
- ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
278
- ; CHECK-NEXT: addvl sp, sp, #-1
279
- ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
280
- ; CHECK-NEXT: .cfi_offset w29, -16
281
- ; CHECK-NEXT: ptrue p0.s
282
- ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
283
- ; CHECK-NEXT: ldr d0, [sp, #8]
284
- ; CHECK-NEXT: addvl sp, sp, #1
285
- ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
270
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
271
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
286
272
; CHECK-NEXT: ret
287
273
%ext = call <2 x i32 > @llvm.vector.extract.v2i32.nxv16i32 (<vscale x 16 x i32 > %arg , i64 2 )
288
274
ret <2 x i32 > %ext
@@ -314,16 +300,9 @@ define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
314
300
; CHECK-NEXT: addvl sp, sp, #-1
315
301
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
316
302
; CHECK-NEXT: .cfi_offset w29, -16
317
- ; CHECK-NEXT: cntd x8
318
303
; CHECK-NEXT: ptrue p0.d
319
- ; CHECK-NEXT: addpl x9, sp, #6
320
- ; CHECK-NEXT: subs x8, x8, #4
321
- ; CHECK-NEXT: csel x8, xzr, x8, lo
322
- ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
323
- ; CHECK-NEXT: cmp x8, #0
324
- ; CHECK-NEXT: csel x8, x8, xzr, lo
325
- ; CHECK-NEXT: lsl x8, x8, #1
326
- ; CHECK-NEXT: ldr d0, [x9, x8]
304
+ ; CHECK-NEXT: st1h { z0.d }, p0, [sp]
305
+ ; CHECK-NEXT: ldr d0, [sp]
327
306
; CHECK-NEXT: addvl sp, sp, #1
328
307
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
329
308
; CHECK-NEXT: ret
@@ -338,17 +317,12 @@ define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
338
317
; CHECK-NEXT: addvl sp, sp, #-1
339
318
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
340
319
; CHECK-NEXT: .cfi_offset w29, -16
341
- ; CHECK-NEXT: cntd x8
342
- ; CHECK-NEXT: mov w9, #4 // =0x4
343
320
; CHECK-NEXT: ptrue p0.d
344
- ; CHECK-NEXT: subs x8, x8, #4
345
- ; CHECK-NEXT: csel x8, xzr, x8, lo
346
- ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
347
- ; CHECK-NEXT: cmp x8, #4
348
- ; CHECK-NEXT: csel x8, x8, x9, lo
349
- ; CHECK-NEXT: addpl x9, sp, #6
350
- ; CHECK-NEXT: lsl x8, x8, #1
351
- ; CHECK-NEXT: ldr d0, [x9, x8]
321
+ ; CHECK-NEXT: ptrue p1.h
322
+ ; CHECK-NEXT: st1h { z0.d }, p0, [sp]
323
+ ; CHECK-NEXT: ld1h { z0.h }, p1/z, [sp]
324
+ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
325
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
352
326
; CHECK-NEXT: addvl sp, sp, #1
353
327
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
354
328
; CHECK-NEXT: ret
0 commit comments