1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
- ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONSTREAMING
3
- ; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
4
- ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT
2
+ ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 | FileCheck %s
3
+ ; RUN: llc < %s -verify-machineinstrs -mattr=+sme -global-isel=0 -force-streaming | FileCheck %s
4
+ ; RUN: llc < %s -verify-machineinstrs -mattr=+sve -global-isel=0 -force-streaming-compatible | FileCheck %s
5
5
6
6
target triple = "aarch64-unknown-linux-gnu"
7
7
@@ -106,18 +106,11 @@ entry:
106
106
}
107
107
108
108
define void @test_str_lane_s8 (ptr %a , <vscale x 16 x i8 > %b ) {
109
- ; CHECK-NONSTREAMING-LABEL: test_str_lane_s8:
110
- ; CHECK-NONSTREAMING: // %bb.0: // %entry
111
- ; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
112
- ; CHECK-NONSTREAMING-NEXT: strb w8, [x0]
113
- ; CHECK-NONSTREAMING-NEXT: ret
114
- ;
115
- ; STREAMING-COMPAT-LABEL: test_str_lane_s8:
116
- ; STREAMING-COMPAT: // %bb.0: // %entry
117
- ; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
118
- ; STREAMING-COMPAT-NEXT: fmov w8, s0
119
- ; STREAMING-COMPAT-NEXT: strb w8, [x0]
120
- ; STREAMING-COMPAT-NEXT: ret
109
+ ; CHECK-LABEL: test_str_lane_s8:
110
+ ; CHECK: // %bb.0: // %entry
111
+ ; CHECK-NEXT: mov z0.b, z0.b[7]
112
+ ; CHECK-NEXT: str b0, [x0]
113
+ ; CHECK-NEXT: ret
121
114
122
115
entry:
123
116
%0 = extractelement <vscale x 16 x i8 > %b , i32 7
@@ -128,8 +121,7 @@ entry:
128
121
define void @test_str_lane0_s8 (ptr %a , <vscale x 16 x i8 > %b ) {
129
122
; CHECK-LABEL: test_str_lane0_s8:
130
123
; CHECK: // %bb.0: // %entry
131
- ; CHECK-NEXT: fmov w8, s0
132
- ; CHECK-NEXT: strb w8, [x0]
124
+ ; CHECK-NEXT: str b0, [x0]
133
125
; CHECK-NEXT: ret
134
126
135
127
entry:
@@ -201,6 +193,19 @@ define void @test_str_reduction_i32_to_i16(ptr %ptr, <vscale x 4 x i1> %p0, <vsc
201
193
ret void
202
194
}
203
195
196
+ define void @test_str_reduction_i32_to_i8 (ptr %ptr , <vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v ) {
197
+ ; CHECK-LABEL: test_str_reduction_i32_to_i8:
198
+ ; CHECK: // %bb.0:
199
+ ; CHECK-NEXT: uaddv d0, p0, z0.s
200
+ ; CHECK-NEXT: str b0, [x0]
201
+ ; CHECK-NEXT: ret
202
+
203
+ %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32 (<vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v )
204
+ %trunc = trunc i64 %reduce to i8
205
+ store i8 %trunc , ptr %ptr , align 1
206
+ ret void
207
+ }
208
+
204
209
define void @test_str_reduction_i32_to_i32_negative_offset (ptr %ptr , <vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v ) {
205
210
; CHECK-LABEL: test_str_reduction_i32_to_i32_negative_offset:
206
211
; CHECK: // %bb.0:
@@ -242,6 +247,20 @@ define void @test_str_reduction_i32_to_i16_negative_offset(ptr %ptr, <vscale x 4
242
247
ret void
243
248
}
244
249
250
+ define void @test_str_reduction_i32_to_i8_negative_offset (ptr %ptr , <vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v ) {
251
+ ; CHECK-LABEL: test_str_reduction_i32_to_i8_negative_offset:
252
+ ; CHECK: // %bb.0:
253
+ ; CHECK-NEXT: uaddv d0, p0, z0.s
254
+ ; CHECK-NEXT: stur b0, [x0, #-8]
255
+ ; CHECK-NEXT: ret
256
+
257
+ %reduce = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32 (<vscale x 4 x i1 > %p0 , <vscale x 4 x i32 > %v )
258
+ %trunc = trunc i64 %reduce to i8
259
+ %out_ptr = getelementptr inbounds i8 , ptr %ptr , i64 -8
260
+ store i8 %trunc , ptr %out_ptr , align 1
261
+ ret void
262
+ }
263
+
245
264
define void @test_str_lane_s32_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
246
265
; CHECK-LABEL: test_str_lane_s32_negative_offset:
247
266
; CHECK: // %bb.0: // %entry
@@ -297,18 +316,11 @@ entry:
297
316
}
298
317
299
318
define void @test_str_lane_s8_negative_offset (ptr %a , <vscale x 16 x i8 > %b ) {
300
- ; CHECK-NONSTREAMING-LABEL: test_str_lane_s8_negative_offset:
301
- ; CHECK-NONSTREAMING: // %bb.0: // %entry
302
- ; CHECK-NONSTREAMING-NEXT: umov w8, v0.b[7]
303
- ; CHECK-NONSTREAMING-NEXT: sturb w8, [x0, #-8]
304
- ; CHECK-NONSTREAMING-NEXT: ret
305
- ;
306
- ; STREAMING-COMPAT-LABEL: test_str_lane_s8_negative_offset:
307
- ; STREAMING-COMPAT: // %bb.0: // %entry
308
- ; STREAMING-COMPAT-NEXT: mov z0.b, z0.b[7]
309
- ; STREAMING-COMPAT-NEXT: fmov w8, s0
310
- ; STREAMING-COMPAT-NEXT: sturb w8, [x0, #-8]
311
- ; STREAMING-COMPAT-NEXT: ret
319
+ ; CHECK-LABEL: test_str_lane_s8_negative_offset:
320
+ ; CHECK: // %bb.0: // %entry
321
+ ; CHECK-NEXT: mov z0.b, z0.b[7]
322
+ ; CHECK-NEXT: stur b0, [x0, #-8]
323
+ ; CHECK-NEXT: ret
312
324
313
325
entry:
314
326
%0 = extractelement <vscale x 16 x i8 > %b , i32 7
@@ -320,8 +332,7 @@ entry:
320
332
define void @test_str_lane0_s8_negative_offset (ptr %a , <vscale x 16 x i8 > %b ) {
321
333
; CHECK-LABEL: test_str_lane0_s8_negative_offset:
322
334
; CHECK: // %bb.0: // %entry
323
- ; CHECK-NEXT: fmov w8, s0
324
- ; CHECK-NEXT: sturb w8, [x0, #-8]
335
+ ; CHECK-NEXT: stur b0, [x0, #-8]
325
336
; CHECK-NEXT: ret
326
337
327
338
entry:
@@ -385,6 +396,48 @@ entry:
385
396
ret void
386
397
}
387
398
399
+
400
+ define void @test_str_trunc_lane_s32_to_s8 (ptr %a , <vscale x 4 x i32 > %b ) {
401
+ ; CHECK-LABEL: test_str_trunc_lane_s32_to_s8:
402
+ ; CHECK: // %bb.0: // %entry
403
+ ; CHECK-NEXT: mov z0.s, z0.s[3]
404
+ ; CHECK-NEXT: str b0, [x0]
405
+ ; CHECK-NEXT: ret
406
+
407
+ entry:
408
+ %0 = extractelement <vscale x 4 x i32 > %b , i32 3
409
+ %trunc = trunc i32 %0 to i8
410
+ store i8 %trunc , ptr %a , align 1
411
+ ret void
412
+ }
413
+
414
+ define void @test_str_trunc_lane0_s32_to_s8 (ptr %a , <vscale x 4 x i32 > %b ) {
415
+ ; CHECK-LABEL: test_str_trunc_lane0_s32_to_s8:
416
+ ; CHECK: // %bb.0: // %entry
417
+ ; CHECK-NEXT: str b0, [x0]
418
+ ; CHECK-NEXT: ret
419
+
420
+ entry:
421
+ %0 = extractelement <vscale x 4 x i32 > %b , i32 0
422
+ %trunc = trunc i32 %0 to i8
423
+ store i8 %trunc , ptr %a , align 1
424
+ ret void
425
+ }
426
+
427
+ define void @test_str_trunc_lane_s64_to_s8 (ptr %a , <vscale x 2 x i64 > %b ) {
428
+ ; CHECK-LABEL: test_str_trunc_lane_s64_to_s8:
429
+ ; CHECK: // %bb.0: // %entry
430
+ ; CHECK-NEXT: mov z0.d, z0.d[3]
431
+ ; CHECK-NEXT: str b0, [x0]
432
+ ; CHECK-NEXT: ret
433
+
434
+ entry:
435
+ %0 = extractelement <vscale x 2 x i64 > %b , i32 3
436
+ %trunc = trunc i64 %0 to i8
437
+ store i8 %trunc , ptr %a , align 1
438
+ ret void
439
+ }
440
+
388
441
define void @test_str_trunc_lane_s32_to_s16_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
389
442
; CHECK-LABEL: test_str_trunc_lane_s32_to_s16_negative_offset:
390
443
; CHECK: // %bb.0: // %entry
@@ -413,3 +466,47 @@ entry:
413
466
store i16 %trunc , ptr %out_ptr , align 2
414
467
ret void
415
468
}
469
+
470
+ define void @test_str_trunc_lane_s32_to_s8_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
471
+ ; CHECK-LABEL: test_str_trunc_lane_s32_to_s8_negative_offset:
472
+ ; CHECK: // %bb.0: // %entry
473
+ ; CHECK-NEXT: mov z0.s, z0.s[3]
474
+ ; CHECK-NEXT: stur b0, [x0, #-8]
475
+ ; CHECK-NEXT: ret
476
+
477
+ entry:
478
+ %0 = extractelement <vscale x 4 x i32 > %b , i32 3
479
+ %trunc = trunc i32 %0 to i8
480
+ %out_ptr = getelementptr inbounds i8 , ptr %a , i64 -8
481
+ store i8 %trunc , ptr %out_ptr , align 1
482
+ ret void
483
+ }
484
+
485
+ define void @test_str_trunc_lane0_s32_to_s8_negative_offset (ptr %a , <vscale x 4 x i32 > %b ) {
486
+ ; CHECK-LABEL: test_str_trunc_lane0_s32_to_s8_negative_offset:
487
+ ; CHECK: // %bb.0: // %entry
488
+ ; CHECK-NEXT: stur b0, [x0, #-8]
489
+ ; CHECK-NEXT: ret
490
+
491
+ entry:
492
+ %0 = extractelement <vscale x 4 x i32 > %b , i32 0
493
+ %trunc = trunc i32 %0 to i8
494
+ %out_ptr = getelementptr inbounds i8 , ptr %a , i64 -8
495
+ store i8 %trunc , ptr %out_ptr , align 1
496
+ ret void
497
+ }
498
+
499
+ define void @test_str_trunc_lane_s64_to_s8_negative_offset (ptr %a , <vscale x 2 x i64 > %b ) {
500
+ ; CHECK-LABEL: test_str_trunc_lane_s64_to_s8_negative_offset:
501
+ ; CHECK: // %bb.0: // %entry
502
+ ; CHECK-NEXT: mov z0.d, z0.d[3]
503
+ ; CHECK-NEXT: stur b0, [x0, #-8]
504
+ ; CHECK-NEXT: ret
505
+
506
+ entry:
507
+ %0 = extractelement <vscale x 2 x i64 > %b , i32 3
508
+ %trunc = trunc i64 %0 to i8
509
+ %out_ptr = getelementptr inbounds i8 , ptr %a , i64 -8
510
+ store i8 %trunc , ptr %out_ptr , align 1
511
+ ret void
512
+ }
0 commit comments