1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,V
3
- ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
2
+ ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b | FileCheck %s --check-prefixes=CHECK,V
3
+ ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b,+ zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
4
4
5
5
declare i32 @llvm.experimental.constrained.fptosi.i32.f64 (double , metadata )
6
6
declare void @g ()
@@ -135,6 +135,48 @@ define void @i8_i16(ptr %p, ptr %q) {
135
135
ret void
136
136
}
137
137
138
+ define void @i8_i16_rotate (ptr %p , ptr %q ) {
139
+ ; CHECK-LABEL: i8_i16_rotate:
140
+ ; CHECK: # %bb.0:
141
+ ; CHECK-NEXT: addi sp, sp, -32
142
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
143
+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
144
+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
145
+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
146
+ ; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
147
+ ; CHECK-NEXT: .cfi_offset ra, -8
148
+ ; CHECK-NEXT: .cfi_offset s0, -16
149
+ ; CHECK-NEXT: .cfi_offset s1, -24
150
+ ; CHECK-NEXT: .cfi_offset s2, -32
151
+ ; CHECK-NEXT: lbu s1, 0(a0)
152
+ ; CHECK-NEXT: lbu s2, 1(a0)
153
+ ; CHECK-NEXT: mv s0, a1
154
+ ; CHECK-NEXT: call g
155
+ ; CHECK-NEXT: sb s2, 0(s0)
156
+ ; CHECK-NEXT: sb s1, 1(s0)
157
+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
158
+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
159
+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
160
+ ; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
161
+ ; CHECK-NEXT: .cfi_restore ra
162
+ ; CHECK-NEXT: .cfi_restore s0
163
+ ; CHECK-NEXT: .cfi_restore s1
164
+ ; CHECK-NEXT: .cfi_restore s2
165
+ ; CHECK-NEXT: addi sp, sp, 32
166
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
167
+ ; CHECK-NEXT: ret
168
+ %p0 = getelementptr i8 , ptr %p , i64 0
169
+ %p1 = getelementptr i8 , ptr %p , i64 1
170
+ %x0 = load i8 , ptr %p0 , align 2
171
+ %x1 = load i8 , ptr %p1
172
+ call void @g ()
173
+ %q0 = getelementptr i8 , ptr %q , i64 0
174
+ %q1 = getelementptr i8 , ptr %q , i64 1
175
+ store i8 %x1 , ptr %q0 , align 2
176
+ store i8 %x0 , ptr %q1
177
+ ret void
178
+ }
179
+
138
180
; We could reorder the first call and the load here to enable
139
181
; merging, but don't currently do so.
140
182
define void @i8_i16_resched_readnone_ld (ptr %p , ptr %q ) {
@@ -228,6 +270,78 @@ define void @i8_i16_resched_readnone_st(ptr %p, ptr %q) {
228
270
ret void
229
271
}
230
272
273
+ define void @i32_i64 (ptr %p , ptr %q ) {
274
+ ; CHECK-LABEL: i32_i64:
275
+ ; CHECK: # %bb.0:
276
+ ; CHECK-NEXT: addi sp, sp, -32
277
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
278
+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
279
+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
280
+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
281
+ ; CHECK-NEXT: .cfi_offset ra, -8
282
+ ; CHECK-NEXT: .cfi_offset s0, -16
283
+ ; CHECK-NEXT: .cfi_offset s1, -24
284
+ ; CHECK-NEXT: ld s1, 0(a0)
285
+ ; CHECK-NEXT: mv s0, a1
286
+ ; CHECK-NEXT: call g
287
+ ; CHECK-NEXT: sd s1, 0(s0)
288
+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
289
+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
290
+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
291
+ ; CHECK-NEXT: .cfi_restore ra
292
+ ; CHECK-NEXT: .cfi_restore s0
293
+ ; CHECK-NEXT: .cfi_restore s1
294
+ ; CHECK-NEXT: addi sp, sp, 32
295
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
296
+ ; CHECK-NEXT: ret
297
+ %p0 = getelementptr i8 , ptr %p , i64 0
298
+ %p1 = getelementptr i8 , ptr %p , i64 4
299
+ %x0 = load i32 , ptr %p0 , align 8
300
+ %x1 = load i32 , ptr %p1
301
+ call void @g ()
302
+ %q0 = getelementptr i8 , ptr %q , i64 0
303
+ %q1 = getelementptr i8 , ptr %q , i64 4
304
+ store i32 %x0 , ptr %q0 , align 8
305
+ store i32 %x1 , ptr %q1
306
+ ret void
307
+ }
308
+
309
+ define void @i32_i64_rotate (ptr %p , ptr %q ) {
310
+ ; CHECK-LABEL: i32_i64_rotate:
311
+ ; CHECK: # %bb.0:
312
+ ; CHECK-NEXT: addi sp, sp, -32
313
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
314
+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
315
+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
316
+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
317
+ ; CHECK-NEXT: .cfi_offset ra, -8
318
+ ; CHECK-NEXT: .cfi_offset s0, -16
319
+ ; CHECK-NEXT: .cfi_offset s1, -24
320
+ ; CHECK-NEXT: mv s0, a1
321
+ ; CHECK-NEXT: ld a0, 0(a0)
322
+ ; CHECK-NEXT: rori s1, a0, 32
323
+ ; CHECK-NEXT: call g
324
+ ; CHECK-NEXT: sd s1, 0(s0)
325
+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
326
+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
327
+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
328
+ ; CHECK-NEXT: .cfi_restore ra
329
+ ; CHECK-NEXT: .cfi_restore s0
330
+ ; CHECK-NEXT: .cfi_restore s1
331
+ ; CHECK-NEXT: addi sp, sp, 32
332
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
333
+ ; CHECK-NEXT: ret
334
+ %p0 = getelementptr i8 , ptr %p , i64 0
335
+ %p1 = getelementptr i8 , ptr %p , i64 4
336
+ %x0 = load i32 , ptr %p0 , align 8
337
+ %x1 = load i32 , ptr %p1
338
+ call void @g ()
339
+ %q0 = getelementptr i8 , ptr %q , i64 0
340
+ %q1 = getelementptr i8 , ptr %q , i64 4
341
+ store i32 %x1 , ptr %q0 , align 8
342
+ store i32 %x0 , ptr %q1
343
+ ret void
344
+ }
231
345
232
346
; Merging vectors is profitable, it reduces pressure within a single
233
347
; register class.
@@ -305,8 +419,7 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
305
419
; CHECK-NEXT: vsetvli zero, s1, e8, m2, ta, ma
306
420
; CHECK-NEXT: vse8.v v8, (s0)
307
421
; CHECK-NEXT: csrr a0, vlenb
308
- ; CHECK-NEXT: slli a0, a0, 1
309
- ; CHECK-NEXT: add sp, sp, a0
422
+ ; CHECK-NEXT: sh1add sp, a0, sp
310
423
; CHECK-NEXT: .cfi_def_cfa sp, 64
311
424
; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
312
425
; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -329,9 +442,44 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
329
442
ret void
330
443
}
331
444
332
- ; TODO: We fail to merge these, which would be profitable.
333
445
define void @two_half (ptr %p , ptr %q ) {
334
- ; V-LABEL: two_half:
446
+ ; CHECK-LABEL: two_half:
447
+ ; CHECK: # %bb.0:
448
+ ; CHECK-NEXT: addi sp, sp, -32
449
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
450
+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
451
+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
452
+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
453
+ ; CHECK-NEXT: .cfi_offset ra, -8
454
+ ; CHECK-NEXT: .cfi_offset s0, -16
455
+ ; CHECK-NEXT: .cfi_offset s1, -24
456
+ ; CHECK-NEXT: lw s1, 0(a0)
457
+ ; CHECK-NEXT: mv s0, a1
458
+ ; CHECK-NEXT: call g
459
+ ; CHECK-NEXT: sw s1, 0(s0)
460
+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
461
+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
462
+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
463
+ ; CHECK-NEXT: .cfi_restore ra
464
+ ; CHECK-NEXT: .cfi_restore s0
465
+ ; CHECK-NEXT: .cfi_restore s1
466
+ ; CHECK-NEXT: addi sp, sp, 32
467
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
468
+ ; CHECK-NEXT: ret
469
+ %p0 = getelementptr i8 , ptr %p , i64 0
470
+ %p1 = getelementptr i8 , ptr %p , i64 2
471
+ %x0 = load half , ptr %p0 , align 4
472
+ %x1 = load half , ptr %p1
473
+ call void @g ()
474
+ %q0 = getelementptr i8 , ptr %q , i64 0
475
+ %q1 = getelementptr i8 , ptr %q , i64 2
476
+ store half %x0 , ptr %q0 , align 4
477
+ store half %x1 , ptr %q1
478
+ ret void
479
+ }
480
+
481
+ define void @two_half_unaligned (ptr %p , ptr %q ) {
482
+ ; V-LABEL: two_half_unaligned:
335
483
; V: # %bb.0:
336
484
; V-NEXT: addi sp, sp, -32
337
485
; V-NEXT: .cfi_def_cfa_offset 32
@@ -361,7 +509,7 @@ define void @two_half(ptr %p, ptr %q) {
361
509
; V-NEXT: .cfi_def_cfa_offset 0
362
510
; V-NEXT: ret
363
511
;
364
- ; ZVFH-LABEL: two_half :
512
+ ; ZVFH-LABEL: two_half_unaligned :
365
513
; ZVFH: # %bb.0:
366
514
; ZVFH-NEXT: addi sp, sp, -32
367
515
; ZVFH-NEXT: .cfi_def_cfa_offset 32
@@ -404,6 +552,7 @@ define void @two_half(ptr %p, ptr %q) {
404
552
ret void
405
553
}
406
554
555
+
407
556
; TODO: This one is currently a vector which is unprofitable, we should
408
557
; use i64 instead.
409
558
define void @two_float (ptr %p , ptr %q ) {
@@ -413,6 +562,42 @@ define void @two_float(ptr %p, ptr %q) {
413
562
; CHECK-NEXT: .cfi_def_cfa_offset 32
414
563
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
415
564
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
565
+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
566
+ ; CHECK-NEXT: .cfi_offset ra, -8
567
+ ; CHECK-NEXT: .cfi_offset s0, -16
568
+ ; CHECK-NEXT: .cfi_offset s1, -24
569
+ ; CHECK-NEXT: ld s1, 0(a0)
570
+ ; CHECK-NEXT: mv s0, a1
571
+ ; CHECK-NEXT: call g
572
+ ; CHECK-NEXT: sd s1, 0(s0)
573
+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
574
+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
575
+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
576
+ ; CHECK-NEXT: .cfi_restore ra
577
+ ; CHECK-NEXT: .cfi_restore s0
578
+ ; CHECK-NEXT: .cfi_restore s1
579
+ ; CHECK-NEXT: addi sp, sp, 32
580
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
581
+ ; CHECK-NEXT: ret
582
+ %p0 = getelementptr i8 , ptr %p , i64 0
583
+ %p1 = getelementptr i8 , ptr %p , i64 4
584
+ %x0 = load float , ptr %p0 , align 8
585
+ %x1 = load float , ptr %p1
586
+ call void @g ()
587
+ %q0 = getelementptr i8 , ptr %q , i64 0
588
+ %q1 = getelementptr i8 , ptr %q , i64 4
589
+ store float %x0 , ptr %q0 , align 8
590
+ store float %x1 , ptr %q1
591
+ ret void
592
+ }
593
+
594
+ define void @two_float_unaligned (ptr %p , ptr %q ) {
595
+ ; CHECK-LABEL: two_float_unaligned:
596
+ ; CHECK: # %bb.0:
597
+ ; CHECK-NEXT: addi sp, sp, -32
598
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
599
+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
600
+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
416
601
; CHECK-NEXT: .cfi_offset ra, -8
417
602
; CHECK-NEXT: .cfi_offset s0, -16
418
603
; CHECK-NEXT: csrr a2, vlenb
@@ -450,6 +635,43 @@ define void @two_float(ptr %p, ptr %q) {
450
635
ret void
451
636
}
452
637
638
+ define void @two_float_rotate (ptr %p , ptr %q ) {
639
+ ; CHECK-LABEL: two_float_rotate:
640
+ ; CHECK: # %bb.0:
641
+ ; CHECK-NEXT: addi sp, sp, -32
642
+ ; CHECK-NEXT: .cfi_def_cfa_offset 32
643
+ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
644
+ ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
645
+ ; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
646
+ ; CHECK-NEXT: .cfi_offset ra, -8
647
+ ; CHECK-NEXT: .cfi_offset s0, -16
648
+ ; CHECK-NEXT: .cfi_offset s1, -24
649
+ ; CHECK-NEXT: mv s0, a1
650
+ ; CHECK-NEXT: ld a0, 0(a0)
651
+ ; CHECK-NEXT: rori s1, a0, 32
652
+ ; CHECK-NEXT: call g
653
+ ; CHECK-NEXT: sd s1, 0(s0)
654
+ ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
655
+ ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
656
+ ; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
657
+ ; CHECK-NEXT: .cfi_restore ra
658
+ ; CHECK-NEXT: .cfi_restore s0
659
+ ; CHECK-NEXT: .cfi_restore s1
660
+ ; CHECK-NEXT: addi sp, sp, 32
661
+ ; CHECK-NEXT: .cfi_def_cfa_offset 0
662
+ ; CHECK-NEXT: ret
663
+ %p0 = getelementptr i8 , ptr %p , i64 0
664
+ %p1 = getelementptr i8 , ptr %p , i64 4
665
+ %x0 = load float , ptr %p0 , align 8
666
+ %x1 = load float , ptr %p1
667
+ call void @g ()
668
+ %q0 = getelementptr i8 , ptr %q , i64 0
669
+ %q1 = getelementptr i8 , ptr %q , i64 4
670
+ store float %x1 , ptr %q0 , align 8
671
+ store float %x0 , ptr %q1
672
+ ret void
673
+ }
674
+
453
675
define void @two_double (ptr %p , ptr %q ) {
454
676
; CHECK-LABEL: two_double:
455
677
; CHECK: # %bb.0:
0 commit comments