Skip to content

Commit 2c782ab

Browse files
authored
[RISCV] Add software pipeliner support (#117546)
This patch adds basic support of `MachinePipeliner` and disable it by default. The functionality should be OK and all llvm-test-suite tests have passed.
1 parent 2302142 commit 2c782ab

File tree

6 files changed

+209
-0
lines changed

6 files changed

+209
-0
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4248,3 +4248,84 @@ bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
42484248
return false;
42494249
return LHS.getImm() <= RHS.getImm();
42504250
}
4251+
4252+
namespace {
4253+
class RISCVPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
4254+
const MachineInstr *LHS;
4255+
const MachineInstr *RHS;
4256+
SmallVector<MachineOperand, 3> Cond;
4257+
4258+
public:
4259+
RISCVPipelinerLoopInfo(const MachineInstr *LHS, const MachineInstr *RHS,
4260+
const SmallVectorImpl<MachineOperand> &Cond)
4261+
: LHS(LHS), RHS(RHS), Cond(Cond.begin(), Cond.end()) {}
4262+
4263+
bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
4264+
// Make the instructions for loop control be placed in stage 0.
4265+
// The predecessors of LHS/RHS are considered by the caller.
4266+
if (LHS && MI == LHS)
4267+
return true;
4268+
if (RHS && MI == RHS)
4269+
return true;
4270+
return false;
4271+
}
4272+
4273+
std::optional<bool> createTripCountGreaterCondition(
4274+
int TC, MachineBasicBlock &MBB,
4275+
SmallVectorImpl<MachineOperand> &CondParam) override {
4276+
// A branch instruction will be inserted as "if (Cond) goto epilogue".
4277+
// Cond is normalized for such use.
4278+
// The predecessors of the branch are assumed to have already been inserted.
4279+
CondParam = Cond;
4280+
return {};
4281+
}
4282+
4283+
void setPreheader(MachineBasicBlock *NewPreheader) override {}
4284+
4285+
void adjustTripCount(int TripCountAdjust) override {}
4286+
4287+
void disposed() override {}
4288+
};
4289+
} // namespace
4290+
4291+
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
4292+
RISCVInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
4293+
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
4294+
SmallVector<MachineOperand, 4> Cond;
4295+
if (analyzeBranch(*LoopBB, TBB, FBB, Cond, /*AllowModify=*/false))
4296+
return nullptr;
4297+
4298+
// Infinite loops are not supported
4299+
if (TBB == LoopBB && FBB == LoopBB)
4300+
return nullptr;
4301+
4302+
// Must be conditional branch
4303+
if (FBB == nullptr)
4304+
return nullptr;
4305+
4306+
assert((TBB == LoopBB || FBB == LoopBB) &&
4307+
"The Loop must be a single-basic-block loop");
4308+
4309+
// Normalization for createTripCountGreaterCondition()
4310+
if (TBB == LoopBB)
4311+
reverseBranchCondition(Cond);
4312+
4313+
const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
4314+
auto FindRegDef = [&MRI](MachineOperand &Op) -> const MachineInstr * {
4315+
if (!Op.isReg())
4316+
return nullptr;
4317+
Register Reg = Op.getReg();
4318+
if (!Reg.isVirtual())
4319+
return nullptr;
4320+
return MRI.getVRegDef(Reg);
4321+
};
4322+
4323+
const MachineInstr *LHS = FindRegDef(Cond[1]);
4324+
const MachineInstr *RHS = FindRegDef(Cond[2]);
4325+
if (LHS && LHS->isPHI())
4326+
return nullptr;
4327+
if (RHS && RHS->isPHI())
4328+
return nullptr;
4329+
4330+
return std::make_unique<RISCVPipelinerLoopInfo>(LHS, RHS, Cond);
4331+
}

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
298298

299299
unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
300300

301+
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
302+
analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
303+
301304
protected:
302305
const RISCVSubtarget &STI;
303306

llvm/lib/Target/RISCV/RISCVSubtarget.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
194194

195195
bool RISCVSubtarget::enableSubRegLiveness() const { return true; }
196196

197+
bool RISCVSubtarget::enableMachinePipeliner() const {
198+
return getSchedModel().hasInstrSchedModel();
199+
}
200+
197201
/// Enable use of alias analysis during code generation (during MI
198202
/// scheduling, DAGCombine, etc.).
199203
bool RISCVSubtarget::useAA() const { return UseAA; }

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
324324

325325
bool enableSubRegLiveness() const override;
326326

327+
bool enableMachinePipeliner() const override;
328+
329+
bool useDFAforSMS() const override { return false; }
330+
327331
bool useAA() const override;
328332

329333
unsigned getCacheLineSize() const override {

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ static cl::opt<bool> DisableVectorMaskMutation(
112112
cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
113113
cl::Hidden);
114114

115+
static cl::opt<bool>
116+
EnableMachinePipeliner("riscv-enable-pipeliner",
117+
cl::desc("Enable Machine Pipeliner for RISC-V"),
118+
cl::init(false), cl::Hidden);
119+
115120
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
116121
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
117122
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -603,6 +608,9 @@ void RISCVPassConfig::addPreRegAlloc() {
603608
addPass(createRISCVInsertReadWriteCSRPass());
604609
addPass(createRISCVInsertWriteVXRMPass());
605610
addPass(createRISCVLandingPadSetupPass());
611+
612+
if (TM->getOptLevel() != CodeGenOptLevel::None && EnableMachinePipeliner)
613+
addPass(&MachinePipelinerID);
606614
}
607615

608616
void RISCVPassConfig::addFastRegAlloc() {
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=false < %s \
3+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-NOT-PIPELINED
4+
; RUN: llc -mtriple=riscv64 -mcpu=sifive-p670 -O3 -verify-machineinstrs -riscv-enable-pipeliner=true < %s \
5+
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-PIPELINED
6+
7+
; We shouldn't pipeline this loop as one operand of branch is a PHI.
8+
define i32 @test_phi() {
9+
; CHECK-LABEL: test_phi:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: li a0, 0
12+
; CHECK-NEXT: .LBB0_1: # %for.body
13+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
14+
; CHECK-NEXT: mv a1, a0
15+
; CHECK-NEXT: li a0, 1
16+
; CHECK-NEXT: sh a0, 0(zero)
17+
; CHECK-NEXT: bnez a1, .LBB0_1
18+
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
19+
; CHECK-NEXT: li a0, 0
20+
; CHECK-NEXT: ret
21+
entry:
22+
br label %for.body
23+
24+
for.cond.cleanup: ; preds = %for.body
25+
ret i32 0
26+
27+
for.body: ; preds = %for.body, %entry
28+
%indvars.iv1 = phi i64 [ 0, %entry ], [ 1, %for.body ]
29+
store i16 1, ptr null, align 4
30+
%exitcond.not.31 = icmp eq i64 %indvars.iv1, 0
31+
br i1 %exitcond.not.31, label %for.cond.cleanup, label %for.body
32+
}
33+
34+
define void @test_pipelined_1(ptr noalias %in, ptr noalias %out, i32 signext %cnt) {
35+
; CHECK-NOT-PIPELINED-LABEL: test_pipelined_1:
36+
; CHECK-NOT-PIPELINED: # %bb.0: # %entry
37+
; CHECK-NOT-PIPELINED-NEXT: blez a2, .LBB1_3
38+
; CHECK-NOT-PIPELINED-NEXT: # %bb.1: # %for.body.preheader
39+
; CHECK-NOT-PIPELINED-NEXT: addi a2, a2, -1
40+
; CHECK-NOT-PIPELINED-NEXT: sh2add.uw a2, a2, a1
41+
; CHECK-NOT-PIPELINED-NEXT: addi a2, a2, 4
42+
; CHECK-NOT-PIPELINED-NEXT: .LBB1_2: # %for.body
43+
; CHECK-NOT-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
44+
; CHECK-NOT-PIPELINED-NEXT: lw a3, 0(a1)
45+
; CHECK-NOT-PIPELINED-NEXT: addi a1, a1, 4
46+
; CHECK-NOT-PIPELINED-NEXT: addi a3, a3, 1
47+
; CHECK-NOT-PIPELINED-NEXT: sw a3, 0(a0)
48+
; CHECK-NOT-PIPELINED-NEXT: addi a0, a0, 4
49+
; CHECK-NOT-PIPELINED-NEXT: bne a1, a2, .LBB1_2
50+
; CHECK-NOT-PIPELINED-NEXT: .LBB1_3: # %for.end
51+
; CHECK-NOT-PIPELINED-NEXT: ret
52+
;
53+
; CHECK-PIPELINED-LABEL: test_pipelined_1:
54+
; CHECK-PIPELINED: # %bb.0: # %entry
55+
; CHECK-PIPELINED-NEXT: blez a2, .LBB1_6
56+
; CHECK-PIPELINED-NEXT: # %bb.1: # %for.body.preheader
57+
; CHECK-PIPELINED-NEXT: lw a4, 0(a1)
58+
; CHECK-PIPELINED-NEXT: addi a2, a2, -1
59+
; CHECK-PIPELINED-NEXT: sh2add.uw a6, a2, a1
60+
; CHECK-PIPELINED-NEXT: addi a2, a0, 4
61+
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
62+
; CHECK-PIPELINED-NEXT: addi a6, a6, 4
63+
; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_5
64+
; CHECK-PIPELINED-NEXT: # %bb.2: # %for.body
65+
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
66+
; CHECK-PIPELINED-NEXT: addi a3, a2, 4
67+
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
68+
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
69+
; CHECK-PIPELINED-NEXT: beq a1, a6, .LBB1_4
70+
; CHECK-PIPELINED-NEXT: .LBB1_3: # %for.body
71+
; CHECK-PIPELINED-NEXT: # =>This Inner Loop Header: Depth=1
72+
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
73+
; CHECK-PIPELINED-NEXT: mv a4, a5
74+
; CHECK-PIPELINED-NEXT: lw a5, 0(a1)
75+
; CHECK-PIPELINED-NEXT: mv a0, a2
76+
; CHECK-PIPELINED-NEXT: mv a2, a3
77+
; CHECK-PIPELINED-NEXT: addi a3, a3, 4
78+
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
79+
; CHECK-PIPELINED-NEXT: addi a1, a1, 4
80+
; CHECK-PIPELINED-NEXT: bne a1, a6, .LBB1_3
81+
; CHECK-PIPELINED-NEXT: .LBB1_4:
82+
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
83+
; CHECK-PIPELINED-NEXT: mv a0, a2
84+
; CHECK-PIPELINED-NEXT: mv a4, a5
85+
; CHECK-PIPELINED-NEXT: .LBB1_5:
86+
; CHECK-PIPELINED-NEXT: addi a4, a4, 1
87+
; CHECK-PIPELINED-NEXT: sw a4, 0(a0)
88+
; CHECK-PIPELINED-NEXT: .LBB1_6: # %for.end
89+
; CHECK-PIPELINED-NEXT: ret
90+
entry:
91+
%cmp = icmp sgt i32 %cnt, 0
92+
br i1 %cmp, label %for.body, label %for.end
93+
94+
for.body: ; preds = %entry, %for.body
95+
%inc.next = phi i32 [ %inc, %for.body ], [ 0, %entry ]
96+
%in.addr.next = phi ptr [ %incdec.in, %for.body ], [ %in, %entry ]
97+
%out.addr.next = phi ptr [ %incdec.out, %for.body ], [ %out, %entry ]
98+
%0 = load i32, ptr %out.addr.next, align 4
99+
%1 = add i32 %0, 1
100+
store i32 %1, ptr %in.addr.next, align 4
101+
%incdec.in = getelementptr inbounds i8, ptr %in.addr.next, i64 4
102+
%incdec.out = getelementptr inbounds i8, ptr %out.addr.next, i64 4
103+
%inc = add nuw nsw i32 %inc.next, 1
104+
%exitcond.not = icmp eq i32 %inc, %cnt
105+
br i1 %exitcond.not, label %for.end, label %for.body
106+
107+
for.end: ; preds = %for.body, %entry
108+
ret void
109+
}

0 commit comments

Comments
 (0)