Skip to content

Commit 240512c

Browse files
authored
[LoongArch] Add a pass to rewrite rd to r0 for non-computational instrs whose return values are unused (#94590)
This patch adds a peephole pass `LoongArchDeadRegisterDefinitions`. It rewrites `rd` to `r0` when `rd` is marked as dead. It may improve the register allocation and reduce pipeline hazards on CPUs without register renaming and OOO.
1 parent d224a03 commit 240512c

File tree

11 files changed

+246
-110
lines changed

11 files changed

+246
-110
lines changed

llvm/lib/Target/LoongArch/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ add_public_tablegen_target(LoongArchCommonTableGen)
1616

1717
add_llvm_target(LoongArchCodeGen
1818
LoongArchAsmPrinter.cpp
19+
LoongArchDeadRegisterDefinitions.cpp
1920
LoongArchExpandAtomicPseudoInsts.cpp
2021
LoongArchExpandPseudoInsts.cpp
2122
LoongArchFrameLowering.cpp

llvm/lib/Target/LoongArch/LoongArch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
3333
MCOperand &MCOp,
3434
const AsmPrinter &AP);
3535

36+
FunctionPass *createLoongArchDeadRegisterDefinitionsPass();
3637
FunctionPass *createLoongArchExpandAtomicPseudoPass();
3738
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
3839
FunctionPass *createLoongArchOptWInstrsPass();
3940
FunctionPass *createLoongArchPreRAExpandPseudoPass();
4041
FunctionPass *createLoongArchExpandPseudoPass();
4142
void initializeLoongArchDAGToDAGISelLegacyPass(PassRegistry &);
43+
void initializeLoongArchDeadRegisterDefinitionsPass(PassRegistry &);
4244
void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
4345
void initializeLoongArchOptWInstrsPass(PassRegistry &);
4446
void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &);
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
//=== LoongArchDeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg ===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===---------------------------------------------------------------------===//
8+
//
9+
// This pass rewrites Rd to r0 for instrs whose return values are unused.
10+
//
11+
//===---------------------------------------------------------------------===//
12+
13+
#include "LoongArch.h"
14+
#include "LoongArchInstrInfo.h"
15+
#include "LoongArchSubtarget.h"
16+
#include "llvm/ADT/Statistic.h"
17+
#include "llvm/CodeGen/LiveDebugVariables.h"
18+
#include "llvm/CodeGen/LiveIntervals.h"
19+
#include "llvm/CodeGen/LiveStacks.h"
20+
#include "llvm/CodeGen/MachineFunctionPass.h"
21+
#include "llvm/CodeGen/MachineRegisterInfo.h"
22+
23+
using namespace llvm;
24+
#define DEBUG_TYPE "loongarch-dead-defs"
25+
#define LoongArch_DEAD_REG_DEF_NAME "LoongArch Dead register definitions"
26+
27+
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
28+
29+
namespace {
30+
class LoongArchDeadRegisterDefinitions : public MachineFunctionPass {
31+
public:
32+
static char ID;
33+
34+
LoongArchDeadRegisterDefinitions() : MachineFunctionPass(ID) {}
35+
bool runOnMachineFunction(MachineFunction &MF) override;
36+
void getAnalysisUsage(AnalysisUsage &AU) const override {
37+
AU.setPreservesCFG();
38+
AU.addRequired<LiveIntervals>();
39+
AU.addPreserved<LiveIntervals>();
40+
AU.addRequired<LiveIntervals>();
41+
AU.addPreserved<SlotIndexes>();
42+
AU.addPreserved<LiveDebugVariables>();
43+
AU.addPreserved<LiveStacks>();
44+
MachineFunctionPass::getAnalysisUsage(AU);
45+
}
46+
47+
StringRef getPassName() const override { return LoongArch_DEAD_REG_DEF_NAME; }
48+
};
49+
} // end anonymous namespace
50+
51+
char LoongArchDeadRegisterDefinitions::ID = 0;
52+
INITIALIZE_PASS(LoongArchDeadRegisterDefinitions, DEBUG_TYPE,
53+
LoongArch_DEAD_REG_DEF_NAME, false, false)
54+
55+
FunctionPass *llvm::createLoongArchDeadRegisterDefinitionsPass() {
56+
return new LoongArchDeadRegisterDefinitions();
57+
}
58+
59+
bool LoongArchDeadRegisterDefinitions::runOnMachineFunction(
60+
MachineFunction &MF) {
61+
if (skipFunction(MF.getFunction()))
62+
return false;
63+
64+
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
65+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
66+
LiveIntervals &LIS = getAnalysis<LiveIntervals>();
67+
LLVM_DEBUG(dbgs() << "***** LoongArchDeadRegisterDefinitions *****\n");
68+
69+
bool MadeChange = false;
70+
for (MachineBasicBlock &MBB : MF) {
71+
for (MachineInstr &MI : MBB) {
72+
// We only handle non-computational instructions.
73+
const MCInstrDesc &Desc = MI.getDesc();
74+
if (!Desc.mayLoad() && !Desc.mayStore() &&
75+
!Desc.hasUnmodeledSideEffects())
76+
continue;
77+
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
78+
MachineOperand &MO = MI.getOperand(I);
79+
if (!MO.isReg() || !MO.isDef() || MO.isEarlyClobber())
80+
continue;
81+
// Be careful not to change the register if it's a tied operand.
82+
if (MI.isRegTiedToUseOperand(I)) {
83+
LLVM_DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
84+
continue;
85+
}
86+
Register Reg = MO.getReg();
87+
if (!Reg.isVirtual() || !MO.isDead())
88+
continue;
89+
LLVM_DEBUG(dbgs() << " Dead def operand #" << I << " in:\n ";
90+
MI.print(dbgs()));
91+
const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
92+
if (!(RC && RC->contains(LoongArch::R0))) {
93+
LLVM_DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
94+
continue;
95+
}
96+
assert(LIS.hasInterval(Reg));
97+
LIS.removeInterval(Reg);
98+
MO.setReg(LoongArch::R0);
99+
LLVM_DEBUG(dbgs() << " Replacing with zero register. New:\n ";
100+
MI.print(dbgs()));
101+
++NumDeadDefsReplaced;
102+
MadeChange = true;
103+
}
104+
}
105+
}
106+
107+
return MadeChange;
108+
}

llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,19 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() {
3434
RegisterTargetMachine<LoongArchTargetMachine> X(getTheLoongArch32Target());
3535
RegisterTargetMachine<LoongArchTargetMachine> Y(getTheLoongArch64Target());
3636
auto *PR = PassRegistry::getPassRegistry();
37+
initializeLoongArchDeadRegisterDefinitionsPass(*PR);
3738
initializeLoongArchOptWInstrsPass(*PR);
3839
initializeLoongArchPreRAExpandPseudoPass(*PR);
3940
initializeLoongArchDAGToDAGISelLegacyPass(*PR);
4041
}
4142

43+
static cl::opt<bool> EnableLoongArchDeadRegisterElimination(
44+
"loongarch-enable-dead-defs", cl::Hidden,
45+
cl::desc("Enable the pass that removes dead"
46+
" definitons and replaces stores to"
47+
" them with stores to r0"),
48+
cl::init(true));
49+
4250
static cl::opt<bool>
4351
EnableLoopDataPrefetch("loongarch-enable-loop-data-prefetch", cl::Hidden,
4452
cl::desc("Enable the loop data prefetch pass"),
@@ -148,6 +156,8 @@ class LoongArchPassConfig : public TargetPassConfig {
148156
void addPreEmitPass2() override;
149157
void addMachineSSAOptimization() override;
150158
void addPreRegAlloc() override;
159+
bool addRegAssignAndRewriteFast() override;
160+
bool addRegAssignAndRewriteOptimized() override;
151161
};
152162
} // end namespace
153163

@@ -200,3 +210,17 @@ void LoongArchPassConfig::addMachineSSAOptimization() {
200210
void LoongArchPassConfig::addPreRegAlloc() {
201211
addPass(createLoongArchPreRAExpandPseudoPass());
202212
}
213+
214+
bool LoongArchPassConfig::addRegAssignAndRewriteFast() {
215+
if (TM->getOptLevel() != CodeGenOptLevel::None &&
216+
EnableLoongArchDeadRegisterElimination)
217+
addPass(createLoongArchDeadRegisterDefinitionsPass());
218+
return TargetPassConfig::addRegAssignAndRewriteFast();
219+
}
220+
221+
bool LoongArchPassConfig::addRegAssignAndRewriteOptimized() {
222+
if (TM->getOptLevel() != CodeGenOptLevel::None &&
223+
EnableLoongArchDeadRegisterElimination)
224+
addPass(createLoongArchDeadRegisterDefinitionsPass());
225+
return TargetPassConfig::addRegAssignAndRewriteOptimized();
226+
}

llvm/test/CodeGen/LoongArch/global-address.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,40 +14,40 @@ define void @foo() nounwind {
1414
; LA32NOPIC: # %bb.0:
1515
; LA32NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
1616
; LA32NOPIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G)
17-
; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
17+
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
1818
; LA32NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
1919
; LA32NOPIC-NEXT: addi.w $a0, $a0, %pc_lo12(g)
20-
; LA32NOPIC-NEXT: ld.w $a0, $a0, 0
20+
; LA32NOPIC-NEXT: ld.w $zero, $a0, 0
2121
; LA32NOPIC-NEXT: ret
2222
;
2323
; LA32PIC-LABEL: foo:
2424
; LA32PIC: # %bb.0:
2525
; LA32PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
2626
; LA32PIC-NEXT: ld.w $a0, $a0, %got_pc_lo12(G)
27-
; LA32PIC-NEXT: ld.w $a0, $a0, 0
27+
; LA32PIC-NEXT: ld.w $zero, $a0, 0
2828
; LA32PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
2929
; LA32PIC-NEXT: addi.w $a0, $a0, %pc_lo12(.Lg$local)
30-
; LA32PIC-NEXT: ld.w $a0, $a0, 0
30+
; LA32PIC-NEXT: ld.w $zero, $a0, 0
3131
; LA32PIC-NEXT: ret
3232
;
3333
; LA64NOPIC-LABEL: foo:
3434
; LA64NOPIC: # %bb.0:
3535
; LA64NOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
3636
; LA64NOPIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
37-
; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
37+
; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
3838
; LA64NOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
3939
; LA64NOPIC-NEXT: addi.d $a0, $a0, %pc_lo12(g)
40-
; LA64NOPIC-NEXT: ld.w $a0, $a0, 0
40+
; LA64NOPIC-NEXT: ld.w $zero, $a0, 0
4141
; LA64NOPIC-NEXT: ret
4242
;
4343
; LA64PIC-LABEL: foo:
4444
; LA64PIC: # %bb.0:
4545
; LA64PIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
4646
; LA64PIC-NEXT: ld.d $a0, $a0, %got_pc_lo12(G)
47-
; LA64PIC-NEXT: ld.w $a0, $a0, 0
47+
; LA64PIC-NEXT: ld.w $zero, $a0, 0
4848
; LA64PIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
4949
; LA64PIC-NEXT: addi.d $a0, $a0, %pc_lo12(.Lg$local)
50-
; LA64PIC-NEXT: ld.w $a0, $a0, 0
50+
; LA64PIC-NEXT: ld.w $zero, $a0, 0
5151
; LA64PIC-NEXT: ret
5252
;
5353
; LA64LARGENOPIC-LABEL: foo:
@@ -57,13 +57,13 @@ define void @foo() nounwind {
5757
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
5858
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
5959
; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0
60-
; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0
60+
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
6161
; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
6262
; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g)
6363
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g)
6464
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g)
6565
; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0
66-
; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0
66+
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
6767
; LA64LARGENOPIC-NEXT: ret
6868
;
6969
; LA64LARGEPIC-LABEL: foo:
@@ -73,13 +73,13 @@ define void @foo() nounwind {
7373
; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
7474
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
7575
; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0
76-
; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0
76+
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
7777
; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
7878
; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local)
7979
; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local)
8080
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
8181
; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0
82-
; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0
82+
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
8383
; LA64LARGEPIC-NEXT: ret
8484
%V = load volatile i32, ptr @G
8585
%v = load volatile i32, ptr @g

llvm/test/CodeGen/LoongArch/intrinsic-la64.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ entry:
178178
define void @csrrd_d_noret() {
179179
; CHECK-LABEL: csrrd_d_noret:
180180
; CHECK: # %bb.0: # %entry
181-
; CHECK-NEXT: csrrd $a0, 1
181+
; CHECK-NEXT: csrrd $zero, 1
182182
; CHECK-NEXT: ret
183183
entry:
184184
%0 = tail call i64 @llvm.loongarch.csrrd.d(i32 1)
@@ -240,7 +240,7 @@ entry:
240240
define void @iocsrrd_d_noret(i32 %a) {
241241
; CHECK-LABEL: iocsrrd_d_noret:
242242
; CHECK: # %bb.0: # %entry
243-
; CHECK-NEXT: iocsrrd.d $a0, $a0
243+
; CHECK-NEXT: iocsrrd.d $zero, $a0
244244
; CHECK-NEXT: ret
245245
entry:
246246
%0 = tail call i64 @llvm.loongarch.iocsrrd.d(i32 %a)
@@ -290,7 +290,7 @@ entry:
290290
define void @lddir_d_noret(i64 %a) {
291291
; CHECK-LABEL: lddir_d_noret:
292292
; CHECK: # %bb.0: # %entry
293-
; CHECK-NEXT: lddir $a0, $a0, 1
293+
; CHECK-NEXT: lddir $zero, $a0, 1
294294
; CHECK-NEXT: ret
295295
entry:
296296
%0 = tail call i64 @llvm.loongarch.lddir.d(i64 %a, i64 1)

llvm/test/CodeGen/LoongArch/intrinsic.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ entry:
7373
define void @movfcsr2gr_noret() nounwind {
7474
; CHECK-LABEL: movfcsr2gr_noret:
7575
; CHECK: # %bb.0: # %entry
76-
; CHECK-NEXT: movfcsr2gr $a0, $fcsr1
76+
; CHECK-NEXT: movfcsr2gr $zero, $fcsr1
7777
; CHECK-NEXT: ret
7878
entry:
7979
%res = call i32 @llvm.loongarch.movfcsr2gr(i32 1)
@@ -103,7 +103,7 @@ entry:
103103
define void @csrrd_w_noret() {
104104
; CHECK-LABEL: csrrd_w_noret:
105105
; CHECK: # %bb.0: # %entry
106-
; CHECK-NEXT: csrrd $a0, 1
106+
; CHECK-NEXT: csrrd $zero, 1
107107
; CHECK-NEXT: ret
108108
entry:
109109
%0 = tail call i32 @llvm.loongarch.csrrd.w(i32 1)
@@ -185,7 +185,7 @@ entry:
185185
define void @iocsrrd_b_noret(i32 %a) {
186186
; CHECK-LABEL: iocsrrd_b_noret:
187187
; CHECK: # %bb.0: # %entry
188-
; CHECK-NEXT: iocsrrd.b $a0, $a0
188+
; CHECK-NEXT: iocsrrd.b $zero, $a0
189189
; CHECK-NEXT: ret
190190
entry:
191191
%0 = tail call i32 @llvm.loongarch.iocsrrd.b(i32 %a)
@@ -195,7 +195,7 @@ entry:
195195
define void @iocsrrd_h_noret(i32 %a) {
196196
; CHECK-LABEL: iocsrrd_h_noret:
197197
; CHECK: # %bb.0: # %entry
198-
; CHECK-NEXT: iocsrrd.h $a0, $a0
198+
; CHECK-NEXT: iocsrrd.h $zero, $a0
199199
; CHECK-NEXT: ret
200200
entry:
201201
%0 = tail call i32 @llvm.loongarch.iocsrrd.h(i32 %a)
@@ -205,7 +205,7 @@ entry:
205205
define void @iocsrrd_w_noret(i32 %a) {
206206
; CHECK-LABEL: iocsrrd_w_noret:
207207
; CHECK: # %bb.0: # %entry
208-
; CHECK-NEXT: iocsrrd.w $a0, $a0
208+
; CHECK-NEXT: iocsrrd.w $zero, $a0
209209
; CHECK-NEXT: ret
210210
entry:
211211
%0 = tail call i32 @llvm.loongarch.iocsrrd.w(i32 %a)

0 commit comments

Comments
 (0)