Skip to content

Commit 3b48d84

Browse files
committed
[RISCV] Optimize more redundant VSETVLIs
D99717 introduced some test cases which showed that the output of one vsetvli into another would not be picked up by the RISCVCleanupVSETVLI pass. This patch teaches the optimization about such a pattern. The pattern is quite common when using the RVV vsetvli intrinsic to pass the VL onto other intrinsics. The second test case introduced by D99717 is left unoptimized by this patch. It is a rarer case and will require us to rewire any uses of the redundant vset[i]vli's output to the previous one's. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D99730
1 parent a4ac847 commit 3b48d84

File tree

4 files changed

+58
-16
lines changed

4 files changed

+58
-16
lines changed

llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,19 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
7575

7676
assert(MI.getOpcode() == RISCV::PseudoVSETVLI);
7777
Register AVLReg = MI.getOperand(1).getReg();
78+
Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
7879

7980
// If this VSETVLI isn't changing VL, it is redundant.
8081
if (AVLReg == RISCV::X0 && MI.getOperand(0).getReg() == RISCV::X0)
8182
return true;
8283

84+
// If the previous VSET{I}VLI's output (which isn't X0) is fed into this
85+
// VSETVLI, this one isn't changing VL so is redundant.
86+
// Only perform this on virtual registers to avoid the complexity of having
87+
// to work out if the physical register was clobbered somewhere in between.
88+
if (AVLReg.isVirtual() && AVLReg == PrevOutVL)
89+
return true;
90+
8391
// If the previous opcode isn't vsetvli we can't do any more comparison.
8492
if (PrevVSETVLI->getOpcode() != RISCV::PseudoVSETVLI)
8593
return false;
@@ -94,7 +102,6 @@ static bool isRedundantVSETVLI(MachineInstr &MI, MachineInstr *PrevVSETVLI) {
94102
// This instruction is setting VL to VLMAX, this is redundant if the
95103
// previous VSETVLI was also setting VL to VLMAX. But it is not redundant
96104
// if they were setting it to any other value or leaving VL unchanged.
97-
Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
98105
return PrevOutVL != RISCV::X0;
99106
}
100107

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-cleanup-vsetvli -o - | FileCheck %s
33

4-
# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
5-
# keeps the previous value of VL, the second time sets it to VLMAX. We can't
6-
# remove the first since we can't tell if this is a change VL.
7-
84
--- |
95
; ModuleID = '../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll'
106
source_filename = "../llvm/test/CodeGen/RISCV/rvv/add-vsetvli-vlmax.ll"
117
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
128
target triple = "riscv64"
139

14-
define void @cleanup_vsetvli() #0 {
10+
define void @cleanup_vsetvli0() #0 {
11+
ret void
12+
}
13+
14+
define void @cleanup_vsetvli1() #0 {
1515
ret void
1616
}
1717

1818
attributes #0 = { "target-features"="+experimental-v" }
1919

2020
...
2121
---
22-
name: cleanup_vsetvli
22+
# Make sure we don't combine these two VSETVLIs in the cleanup pass. The first
23+
# keeps the previous value of VL, the second sets it to VLMAX. We can't remove
24+
# the first since we can't tell if this is a change of VL.
25+
name: cleanup_vsetvli0
2326
alignment: 4
2427
tracksRegLiveness: true
2528
registers:
@@ -29,7 +32,7 @@ frameInfo:
2932
machineFunctionInfo: {}
3033
body: |
3134
bb.0 (%ir-block.0):
32-
; CHECK-LABEL: name: cleanup_vsetvli
35+
; CHECK-LABEL: name: cleanup_vsetvli0
3336
; CHECK: dead $x0 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
3437
; CHECK: dead %0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
3538
; CHECK: PseudoRET
@@ -38,3 +41,39 @@ body: |
3841
PseudoRET
3942
4043
...
44+
---
45+
# 1. Ensure we can remove the second VSETVLI which takes its AVL from the first VSETVLI.
46+
# 2. Ensure we can remove the fourth VSETVLI which takes its AVL from the VSETIVLI.
47+
# 3. Make sure we don't combine the latter two VSETVLIs; the first outputs to a
48+
# physical register which is clobbered by a later instruction.
49+
name: cleanup_vsetvli1
50+
alignment: 4
51+
tracksRegLiveness: true
52+
registers:
53+
- { id: 0, class: gpr }
54+
frameInfo:
55+
maxAlignment: 1
56+
machineFunctionInfo: {}
57+
body: |
58+
bb.0 (%ir-block.0):
59+
liveins: $x3
60+
; CHECK-LABEL: name: cleanup_vsetvli1
61+
; CHECK: liveins: $x3
62+
; CHECK: [[PseudoVSETVLI:%[0-9]+]]:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
63+
; CHECK: [[PseudoVSETIVLI:%[0-9]+]]:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
64+
; CHECK: $x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
65+
; CHECK: $x1 = COPY $x3
66+
; CHECK: dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
67+
; CHECK: PseudoRET
68+
%0:gpr = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
69+
dead %1:gpr = PseudoVSETVLI %0, 12, implicit-def $vl, implicit-def $vtype
70+
71+
%2:gpr = PseudoVSETIVLI 4, 12, implicit-def $vl, implicit-def $vtype
72+
dead %3:gpr = PseudoVSETVLI %2, 12, implicit-def $vl, implicit-def $vtype
73+
74+
$x1 = PseudoVSETVLI $x0, 12, implicit-def $vl, implicit-def $vtype
75+
$x1 = COPY $x3
76+
dead %4:gpr = PseudoVSETVLI $x1, 12, implicit-def $vl, implicit-def $vtype
77+
PseudoRET
78+
79+
...

llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,10 @@ define void @test_vsetvlimax_e64m8() nounwind {
3434
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)
3535

3636
; Check that we remove the redundant vsetvli when followed by another operation
37-
; FIXME: We don't
3837
define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
3938
; CHECK-LABEL: redundant_vsetvli:
4039
; CHECK: # %bb.0:
4140
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
42-
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
4341
; CHECK-NEXT: vle32.v v8, (a1)
4442
; CHECK-NEXT: ret
4543
%vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
@@ -49,13 +47,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr)
4947

5048
; Check that we remove the repeated/redundant vsetvli when followed by another
5149
; operation
52-
; FIXME: We don't
50+
; FIXME: We don't catch the second vsetvli because it has a use of its output.
51+
; We could replace it with the output of the first vsetvli.
5352
define <vscale x 4 x i32> @repeated_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
5453
; CHECK-LABEL: repeated_vsetvli:
5554
; CHECK: # %bb.0:
5655
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
5756
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
58-
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
5957
; CHECK-NEXT: vle32.v v8, (a1)
6058
; CHECK-NEXT: ret
6159
%vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)

llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,10 @@ define void @test_vsetvlimax_e64m4() nounwind {
5252
declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)
5353

5454
; Check that we remove the redundant vsetvli when followed by another operation
55-
; FIXME: We don't
5655
define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
5756
; CHECK-LABEL: redundant_vsetvli:
5857
; CHECK: # %bb.0:
5958
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
60-
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
6159
; CHECK-NEXT: vle32.v v8, (a1)
6260
; CHECK-NEXT: ret
6361
%vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
@@ -67,13 +65,13 @@ define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr)
6765

6866
; Check that we remove the repeated/redundant vsetvli when followed by another
6967
; operation
70-
; FIXME: We don't
68+
; FIXME: We don't catch the second vsetvli because it has a use of its output.
69+
; We could replace it with the output of the first vsetvli.
7170
define <vscale x 4 x i32> @repeated_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
7271
; CHECK-LABEL: repeated_vsetvli:
7372
; CHECK: # %bb.0:
7473
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
7574
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
76-
; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
7775
; CHECK-NEXT: vle32.v v8, (a1)
7876
; CHECK-NEXT: ret
7977
%vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)

0 commit comments

Comments
 (0)