|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
2 | 2 | ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
|
3 | 3 |
|
4 |
| -; FIXME: The i32 load and store pair isn't dead and shouldn't be omitted. |
| 4 | +; This showcases a miscompile that was fixed in #90573: |
| 5 | +; - The memset will be type-legalized to a 512 bit store + 2 x 128 bit stores. |
| 6 | +; - the load and store of q aliases the upper 128 bits store of p. |
| 7 | +; - The aliasing 128 bit store will be between the chain of the scalar |
| 8 | +; load/store: |
| 9 | +; |
| 10 | +; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... |
| 11 | +; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... |
| 12 | +; |
| 13 | +; t44: i64,ch = load<(load (s32) from %ir.q), sext from i32> t0, ... |
| 14 | +; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... |
| 15 | +; t46: ch = store<(store (s32) into %ir.q), trunc to i32> t50, ... |
| 16 | +; |
| 17 | +; Previously, the scalar load/store was incorrectly combined away: |
| 18 | +; |
| 19 | +; t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ... |
| 20 | +; t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ... |
| 21 | +; |
| 22 | +; // MISSING |
| 23 | +; t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ... |
| 24 | +; // MISSING |
| 25 | +; See also pr83017.ll: This is the same code, but relies on vscale_range instead |
| 26 | +; of -riscv-v-vector-bits-max=128. |
5 | 27 | define void @f(ptr %p) vscale_range(2,2) {
|
6 | 28 | ; CHECK-LABEL: f:
|
7 | 29 | ; CHECK: # %bb.0:
|
8 |
| -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma |
9 |
| -; CHECK-NEXT: vmv.v.i v8, 0 |
10 |
| -; CHECK-NEXT: vs4r.v v8, (a0) |
11 |
| -; CHECK-NEXT: addi a1, a0, 80 |
| 30 | +; CHECK-NEXT: lw a1, 84(a0) |
| 31 | +; CHECK-NEXT: addi a2, a0, 80 |
12 | 32 | ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
|
13 | 33 | ; CHECK-NEXT: vmv.v.i v8, 0
|
14 |
| -; CHECK-NEXT: vs1r.v v8, (a1) |
15 |
| -; CHECK-NEXT: addi a0, a0, 64 |
16 |
| -; CHECK-NEXT: vs1r.v v8, (a0) |
| 34 | +; CHECK-NEXT: vs1r.v v8, (a2) |
| 35 | +; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma |
| 36 | +; CHECK-NEXT: vmv.v.i v12, 0 |
| 37 | +; CHECK-NEXT: vs4r.v v12, (a0) |
| 38 | +; CHECK-NEXT: addi a2, a0, 64 |
| 39 | +; CHECK-NEXT: vs1r.v v8, (a2) |
| 40 | +; CHECK-NEXT: sw a1, 84(a0) |
17 | 41 | ; CHECK-NEXT: ret
|
18 | 42 | %q = getelementptr inbounds i8, ptr %p, i64 84
|
19 | 43 | %x = load i32, ptr %q
|
|
0 commit comments