Skip to content

Commit 13c2378

Browse files
committed
[RISCV] Expand test coverage for DAG store merging
Two sets of additions: 1) Exercise the rotation path, both for integer and float 2) Exercise the aligned and unaligned paths separately
1 parent c4eec9e commit 13c2378

File tree

1 file changed

+229
-7
lines changed

1 file changed

+229
-7
lines changed

llvm/test/CodeGen/RISCV/stores-of-loads-merging.ll renamed to llvm/test/CodeGen/RISCV/rvv/stores-of-loads-merging.ll

Lines changed: 229 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,V
3-
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b | FileCheck %s --check-prefixes=CHECK,V
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+b,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
44

55
declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
66
declare void @g()
@@ -135,6 +135,48 @@ define void @i8_i16(ptr %p, ptr %q) {
135135
ret void
136136
}
137137

138+
define void @i8_i16_rotate(ptr %p, ptr %q) {
139+
; CHECK-LABEL: i8_i16_rotate:
140+
; CHECK: # %bb.0:
141+
; CHECK-NEXT: addi sp, sp, -32
142+
; CHECK-NEXT: .cfi_def_cfa_offset 32
143+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
144+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
145+
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
146+
; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill
147+
; CHECK-NEXT: .cfi_offset ra, -8
148+
; CHECK-NEXT: .cfi_offset s0, -16
149+
; CHECK-NEXT: .cfi_offset s1, -24
150+
; CHECK-NEXT: .cfi_offset s2, -32
151+
; CHECK-NEXT: lbu s1, 0(a0)
152+
; CHECK-NEXT: lbu s2, 1(a0)
153+
; CHECK-NEXT: mv s0, a1
154+
; CHECK-NEXT: call g
155+
; CHECK-NEXT: sb s2, 0(s0)
156+
; CHECK-NEXT: sb s1, 1(s0)
157+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
158+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
159+
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
160+
; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload
161+
; CHECK-NEXT: .cfi_restore ra
162+
; CHECK-NEXT: .cfi_restore s0
163+
; CHECK-NEXT: .cfi_restore s1
164+
; CHECK-NEXT: .cfi_restore s2
165+
; CHECK-NEXT: addi sp, sp, 32
166+
; CHECK-NEXT: .cfi_def_cfa_offset 0
167+
; CHECK-NEXT: ret
168+
%p0 = getelementptr i8, ptr %p, i64 0
169+
%p1 = getelementptr i8, ptr %p, i64 1
170+
%x0 = load i8, ptr %p0, align 2
171+
%x1 = load i8, ptr %p1
172+
call void @g()
173+
%q0 = getelementptr i8, ptr %q, i64 0
174+
%q1 = getelementptr i8, ptr %q, i64 1
175+
store i8 %x1, ptr %q0, align 2
176+
store i8 %x0, ptr %q1
177+
ret void
178+
}
179+
138180
; We could reorder the first call and the load here to enable
139181
; merging, but don't currently do so.
140182
define void @i8_i16_resched_readnone_ld(ptr %p, ptr %q) {
@@ -228,6 +270,78 @@ define void @i8_i16_resched_readnone_st(ptr %p, ptr %q) {
228270
ret void
229271
}
230272

273+
define void @i32_i64(ptr %p, ptr %q) {
274+
; CHECK-LABEL: i32_i64:
275+
; CHECK: # %bb.0:
276+
; CHECK-NEXT: addi sp, sp, -32
277+
; CHECK-NEXT: .cfi_def_cfa_offset 32
278+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
279+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
280+
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
281+
; CHECK-NEXT: .cfi_offset ra, -8
282+
; CHECK-NEXT: .cfi_offset s0, -16
283+
; CHECK-NEXT: .cfi_offset s1, -24
284+
; CHECK-NEXT: ld s1, 0(a0)
285+
; CHECK-NEXT: mv s0, a1
286+
; CHECK-NEXT: call g
287+
; CHECK-NEXT: sd s1, 0(s0)
288+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
289+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
290+
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
291+
; CHECK-NEXT: .cfi_restore ra
292+
; CHECK-NEXT: .cfi_restore s0
293+
; CHECK-NEXT: .cfi_restore s1
294+
; CHECK-NEXT: addi sp, sp, 32
295+
; CHECK-NEXT: .cfi_def_cfa_offset 0
296+
; CHECK-NEXT: ret
297+
%p0 = getelementptr i8, ptr %p, i64 0
298+
%p1 = getelementptr i8, ptr %p, i64 4
299+
%x0 = load i32, ptr %p0, align 8
300+
%x1 = load i32, ptr %p1
301+
call void @g()
302+
%q0 = getelementptr i8, ptr %q, i64 0
303+
%q1 = getelementptr i8, ptr %q, i64 4
304+
store i32 %x0, ptr %q0, align 8
305+
store i32 %x1, ptr %q1
306+
ret void
307+
}
308+
309+
define void @i32_i64_rotate(ptr %p, ptr %q) {
310+
; CHECK-LABEL: i32_i64_rotate:
311+
; CHECK: # %bb.0:
312+
; CHECK-NEXT: addi sp, sp, -32
313+
; CHECK-NEXT: .cfi_def_cfa_offset 32
314+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
315+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
316+
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
317+
; CHECK-NEXT: .cfi_offset ra, -8
318+
; CHECK-NEXT: .cfi_offset s0, -16
319+
; CHECK-NEXT: .cfi_offset s1, -24
320+
; CHECK-NEXT: mv s0, a1
321+
; CHECK-NEXT: ld a0, 0(a0)
322+
; CHECK-NEXT: rori s1, a0, 32
323+
; CHECK-NEXT: call g
324+
; CHECK-NEXT: sd s1, 0(s0)
325+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
326+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
327+
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
328+
; CHECK-NEXT: .cfi_restore ra
329+
; CHECK-NEXT: .cfi_restore s0
330+
; CHECK-NEXT: .cfi_restore s1
331+
; CHECK-NEXT: addi sp, sp, 32
332+
; CHECK-NEXT: .cfi_def_cfa_offset 0
333+
; CHECK-NEXT: ret
334+
%p0 = getelementptr i8, ptr %p, i64 0
335+
%p1 = getelementptr i8, ptr %p, i64 4
336+
%x0 = load i32, ptr %p0, align 8
337+
%x1 = load i32, ptr %p1
338+
call void @g()
339+
%q0 = getelementptr i8, ptr %q, i64 0
340+
%q1 = getelementptr i8, ptr %q, i64 4
341+
store i32 %x1, ptr %q0, align 8
342+
store i32 %x0, ptr %q1
343+
ret void
344+
}
231345

232346
; Merging vectors is profitable, it reduces pressure within a single
233347
; register class.
@@ -305,8 +419,7 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
305419
; CHECK-NEXT: vsetvli zero, s1, e8, m2, ta, ma
306420
; CHECK-NEXT: vse8.v v8, (s0)
307421
; CHECK-NEXT: csrr a0, vlenb
308-
; CHECK-NEXT: slli a0, a0, 1
309-
; CHECK-NEXT: add sp, sp, a0
422+
; CHECK-NEXT: sh1add sp, a0, sp
310423
; CHECK-NEXT: .cfi_def_cfa sp, 64
311424
; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
312425
; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
@@ -329,9 +442,44 @@ define void @v16i8_v32i8(ptr %p, ptr %q) {
329442
ret void
330443
}
331444

332-
; TODO: We fail to merge these, which would be profitable.
333445
define void @two_half(ptr %p, ptr %q) {
334-
; V-LABEL: two_half:
446+
; CHECK-LABEL: two_half:
447+
; CHECK: # %bb.0:
448+
; CHECK-NEXT: addi sp, sp, -32
449+
; CHECK-NEXT: .cfi_def_cfa_offset 32
450+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
451+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
452+
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
453+
; CHECK-NEXT: .cfi_offset ra, -8
454+
; CHECK-NEXT: .cfi_offset s0, -16
455+
; CHECK-NEXT: .cfi_offset s1, -24
456+
; CHECK-NEXT: lw s1, 0(a0)
457+
; CHECK-NEXT: mv s0, a1
458+
; CHECK-NEXT: call g
459+
; CHECK-NEXT: sw s1, 0(s0)
460+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
461+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
462+
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
463+
; CHECK-NEXT: .cfi_restore ra
464+
; CHECK-NEXT: .cfi_restore s0
465+
; CHECK-NEXT: .cfi_restore s1
466+
; CHECK-NEXT: addi sp, sp, 32
467+
; CHECK-NEXT: .cfi_def_cfa_offset 0
468+
; CHECK-NEXT: ret
469+
%p0 = getelementptr i8, ptr %p, i64 0
470+
%p1 = getelementptr i8, ptr %p, i64 2
471+
%x0 = load half, ptr %p0, align 4
472+
%x1 = load half, ptr %p1
473+
call void @g()
474+
%q0 = getelementptr i8, ptr %q, i64 0
475+
%q1 = getelementptr i8, ptr %q, i64 2
476+
store half %x0, ptr %q0, align 4
477+
store half %x1, ptr %q1
478+
ret void
479+
}
480+
481+
define void @two_half_unaligned(ptr %p, ptr %q) {
482+
; V-LABEL: two_half_unaligned:
335483
; V: # %bb.0:
336484
; V-NEXT: addi sp, sp, -32
337485
; V-NEXT: .cfi_def_cfa_offset 32
@@ -361,7 +509,7 @@ define void @two_half(ptr %p, ptr %q) {
361509
; V-NEXT: .cfi_def_cfa_offset 0
362510
; V-NEXT: ret
363511
;
364-
; ZVFH-LABEL: two_half:
512+
; ZVFH-LABEL: two_half_unaligned:
365513
; ZVFH: # %bb.0:
366514
; ZVFH-NEXT: addi sp, sp, -32
367515
; ZVFH-NEXT: .cfi_def_cfa_offset 32
@@ -404,6 +552,7 @@ define void @two_half(ptr %p, ptr %q) {
404552
ret void
405553
}
406554

555+
407556
; TODO: This one is currently a vector which is unprofitable, we should
408557
; use i64 instead.
409558
define void @two_float(ptr %p, ptr %q) {
@@ -413,6 +562,42 @@ define void @two_float(ptr %p, ptr %q) {
413562
; CHECK-NEXT: .cfi_def_cfa_offset 32
414563
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
415564
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
565+
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
566+
; CHECK-NEXT: .cfi_offset ra, -8
567+
; CHECK-NEXT: .cfi_offset s0, -16
568+
; CHECK-NEXT: .cfi_offset s1, -24
569+
; CHECK-NEXT: ld s1, 0(a0)
570+
; CHECK-NEXT: mv s0, a1
571+
; CHECK-NEXT: call g
572+
; CHECK-NEXT: sd s1, 0(s0)
573+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
574+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
575+
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
576+
; CHECK-NEXT: .cfi_restore ra
577+
; CHECK-NEXT: .cfi_restore s0
578+
; CHECK-NEXT: .cfi_restore s1
579+
; CHECK-NEXT: addi sp, sp, 32
580+
; CHECK-NEXT: .cfi_def_cfa_offset 0
581+
; CHECK-NEXT: ret
582+
%p0 = getelementptr i8, ptr %p, i64 0
583+
%p1 = getelementptr i8, ptr %p, i64 4
584+
%x0 = load float, ptr %p0, align 8
585+
%x1 = load float, ptr %p1
586+
call void @g()
587+
%q0 = getelementptr i8, ptr %q, i64 0
588+
%q1 = getelementptr i8, ptr %q, i64 4
589+
store float %x0, ptr %q0, align 8
590+
store float %x1, ptr %q1
591+
ret void
592+
}
593+
594+
define void @two_float_unaligned(ptr %p, ptr %q) {
595+
; CHECK-LABEL: two_float_unaligned:
596+
; CHECK: # %bb.0:
597+
; CHECK-NEXT: addi sp, sp, -32
598+
; CHECK-NEXT: .cfi_def_cfa_offset 32
599+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
600+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
416601
; CHECK-NEXT: .cfi_offset ra, -8
417602
; CHECK-NEXT: .cfi_offset s0, -16
418603
; CHECK-NEXT: csrr a2, vlenb
@@ -450,6 +635,43 @@ define void @two_float(ptr %p, ptr %q) {
450635
ret void
451636
}
452637

638+
define void @two_float_rotate(ptr %p, ptr %q) {
639+
; CHECK-LABEL: two_float_rotate:
640+
; CHECK: # %bb.0:
641+
; CHECK-NEXT: addi sp, sp, -32
642+
; CHECK-NEXT: .cfi_def_cfa_offset 32
643+
; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
644+
; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
645+
; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
646+
; CHECK-NEXT: .cfi_offset ra, -8
647+
; CHECK-NEXT: .cfi_offset s0, -16
648+
; CHECK-NEXT: .cfi_offset s1, -24
649+
; CHECK-NEXT: mv s0, a1
650+
; CHECK-NEXT: ld a0, 0(a0)
651+
; CHECK-NEXT: rori s1, a0, 32
652+
; CHECK-NEXT: call g
653+
; CHECK-NEXT: sd s1, 0(s0)
654+
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
655+
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
656+
; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
657+
; CHECK-NEXT: .cfi_restore ra
658+
; CHECK-NEXT: .cfi_restore s0
659+
; CHECK-NEXT: .cfi_restore s1
660+
; CHECK-NEXT: addi sp, sp, 32
661+
; CHECK-NEXT: .cfi_def_cfa_offset 0
662+
; CHECK-NEXT: ret
663+
%p0 = getelementptr i8, ptr %p, i64 0
664+
%p1 = getelementptr i8, ptr %p, i64 4
665+
%x0 = load float, ptr %p0, align 8
666+
%x1 = load float, ptr %p1
667+
call void @g()
668+
%q0 = getelementptr i8, ptr %q, i64 0
669+
%q1 = getelementptr i8, ptr %q, i64 4
670+
store float %x1, ptr %q0, align 8
671+
store float %x0, ptr %q1
672+
ret void
673+
}
674+
453675
define void @two_double(ptr %p, ptr %q) {
454676
; CHECK-LABEL: two_double:
455677
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)