Skip to content

Commit 2906232

Browse files
authored
[RISCV] Implement tail call optimization in machine outliner (#115297)
Following up issue #89822, this patch adds opportunity to use tail call in machine outliner pass. Also it enables outline patterns with X5(T0) register.
1 parent 93caee1 commit 2906232

10 files changed

+298
-79
lines changed

llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,12 @@ static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
208208
return Desc.getNumOperands() - Offset;
209209
}
210210

211+
static inline unsigned getTailExpandUseRegNo(const FeatureBitset &FeatureBits) {
212+
// For Zicfilp, PseudoTAIL should be expanded to a software guarded branch.
213+
// It means to use t2(x7) as rs1 of JALR to expand PseudoTAIL.
214+
return FeatureBits[RISCV::FeatureStdExtZicfilp] ? RISCV::X7 : RISCV::X6;
215+
}
216+
211217
static inline unsigned getSEWOpNum(const MCInstrDesc &Desc) {
212218
const uint64_t TSFlags = Desc.TSFlags;
213219
assert(hasSEWOp(TSFlags));

llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
124124
MCRegister Ra;
125125
if (MI.getOpcode() == RISCV::PseudoTAIL) {
126126
Func = MI.getOperand(0);
127-
Ra = RISCV::X6;
128-
// For Zicfilp, PseudoTAIL should be expanded to a software guarded branch.
129-
// It means to use t2(x7) as rs1 of JALR to expand PseudoTAIL.
130-
if (STI.hasFeature(RISCV::FeatureStdExtZicfilp))
131-
Ra = RISCV::X7;
127+
Ra = RISCVII::getTailExpandUseRegNo(STI.getFeatureBits());
132128
} else if (MI.getOpcode() == RISCV::PseudoCALLReg) {
133129
Func = MI.getOperand(1);
134130
Ra = MI.getOperand(0).getReg();

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 110 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "RISCVInstrInfo.h"
14+
#include "MCTargetDesc/RISCVBaseInfo.h"
1415
#include "MCTargetDesc/RISCVMatInt.h"
1516
#include "RISCV.h"
1617
#include "RISCVMachineFunctionInfo.h"
@@ -2927,6 +2928,7 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
29272928

29282929
// Enum values indicating how an outlined call should be constructed.
29292930
enum MachineOutlinerConstructionID {
2931+
MachineOutlinerTailCall,
29302932
MachineOutlinerDefault
29312933
};
29322934

@@ -2935,46 +2937,118 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
29352937
return MF.getFunction().hasMinSize();
29362938
}
29372939

2940+
static bool isCandidatePatchable(const MachineBasicBlock &MBB) {
2941+
const MachineFunction *MF = MBB.getParent();
2942+
const Function &F = MF->getFunction();
2943+
return F.getFnAttribute("fentry-call").getValueAsBool() ||
2944+
F.hasFnAttribute("patchable-function-entry");
2945+
}
2946+
2947+
static bool isMIReadsReg(const MachineInstr &MI, const TargetRegisterInfo *TRI,
2948+
unsigned RegNo) {
2949+
return MI.readsRegister(RegNo, TRI) ||
2950+
MI.getDesc().hasImplicitUseOfPhysReg(RegNo);
2951+
}
2952+
2953+
static bool isMIModifiesReg(const MachineInstr &MI,
2954+
const TargetRegisterInfo *TRI, unsigned RegNo) {
2955+
return MI.modifiesRegister(RegNo, TRI) ||
2956+
MI.getDesc().hasImplicitDefOfPhysReg(RegNo);
2957+
}
2958+
2959+
static bool cannotInsertTailCall(const MachineBasicBlock &MBB) {
2960+
if (!MBB.back().isReturn())
2961+
return true;
2962+
if (isCandidatePatchable(MBB))
2963+
return true;
2964+
2965+
// If the candidate reads the pre-set register
2966+
// that can be used for expanding PseudoTAIL instruction,
2967+
// then we cannot insert tail call.
2968+
const TargetSubtargetInfo &STI = MBB.getParent()->getSubtarget();
2969+
unsigned TailExpandUseRegNo =
2970+
RISCVII::getTailExpandUseRegNo(STI.getFeatureBits());
2971+
for (const MachineInstr &MI : MBB) {
2972+
if (isMIReadsReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
2973+
return true;
2974+
if (isMIModifiesReg(MI, STI.getRegisterInfo(), TailExpandUseRegNo))
2975+
break;
2976+
}
2977+
return false;
2978+
}
2979+
2980+
static std::optional<MachineOutlinerConstructionID>
2981+
analyzeCandidate(outliner::Candidate &C) {
2982+
// If last instruction is return then we can rely on
2983+
// the verification already performed in the getOutliningTypeImpl.
2984+
if (C.back().isReturn()) {
2985+
assert(!cannotInsertTailCall(*C.getMBB()) &&
2986+
"The candidate who uses return instruction must be outlined "
2987+
"using tail call");
2988+
return MachineOutlinerTailCall;
2989+
}
2990+
2991+
auto CandidateUsesX5 = [](outliner::Candidate &C) {
2992+
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2993+
if (std::any_of(C.begin(), C.end(), [TRI](const MachineInstr &MI) {
2994+
return isMIModifiesReg(MI, TRI, RISCV::X5);
2995+
}))
2996+
return true;
2997+
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2998+
};
2999+
3000+
if (!CandidateUsesX5(C))
3001+
return MachineOutlinerDefault;
3002+
3003+
return std::nullopt;
3004+
}
3005+
29383006
std::optional<std::unique_ptr<outliner::OutlinedFunction>>
29393007
RISCVInstrInfo::getOutliningCandidateInfo(
29403008
const MachineModuleInfo &MMI,
29413009
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
29423010
unsigned MinRepeats) const {
29433011

2944-
// First we need to filter out candidates where the X5 register (IE t0) can't
2945-
// be used to setup the function call.
2946-
auto CannotInsertCall = [](outliner::Candidate &C) {
2947-
const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
2948-
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
2949-
};
2950-
2951-
llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);
3012+
// Each RepeatedSequenceLoc is identical.
3013+
outliner::Candidate &Candidate = RepeatedSequenceLocs[0];
3014+
auto CandidateInfo = analyzeCandidate(Candidate);
3015+
if (!CandidateInfo)
3016+
RepeatedSequenceLocs.clear();
29523017

29533018
// If the sequence doesn't have enough candidates left, then we're done.
29543019
if (RepeatedSequenceLocs.size() < MinRepeats)
29553020
return std::nullopt;
29563021

2957-
unsigned SequenceSize = 0;
2958-
2959-
for (auto &MI : RepeatedSequenceLocs[0])
2960-
SequenceSize += getInstSizeInBytes(MI);
3022+
unsigned InstrSizeCExt =
3023+
Candidate.getMF()->getSubtarget<RISCVSubtarget>().hasStdExtCOrZca() ? 2
3024+
: 4;
3025+
unsigned CallOverhead = 0, FrameOverhead = 0;
3026+
3027+
MachineOutlinerConstructionID MOCI = CandidateInfo.value();
3028+
switch (MOCI) {
3029+
case MachineOutlinerDefault:
3030+
// call t0, function = 8 bytes.
3031+
CallOverhead = 8;
3032+
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
3033+
FrameOverhead = InstrSizeCExt;
3034+
break;
3035+
case MachineOutlinerTailCall:
3036+
// tail call = auipc + jalr in the worst case without linker relaxation.
3037+
CallOverhead = 4 + InstrSizeCExt;
3038+
// Using tail call we move ret instruction from caller to callee.
3039+
FrameOverhead = 0;
3040+
break;
3041+
}
29613042

2962-
// call t0, function = 8 bytes.
2963-
unsigned CallOverhead = 8;
29643043
for (auto &C : RepeatedSequenceLocs)
2965-
C.setCallInfo(MachineOutlinerDefault, CallOverhead);
3044+
C.setCallInfo(MOCI, CallOverhead);
29663045

2967-
// jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled.
2968-
unsigned FrameOverhead = 4;
2969-
if (RepeatedSequenceLocs[0]
2970-
.getMF()
2971-
->getSubtarget<RISCVSubtarget>()
2972-
.hasStdExtCOrZca())
2973-
FrameOverhead = 2;
3046+
unsigned SequenceSize = 0;
3047+
for (auto &MI : Candidate)
3048+
SequenceSize += getInstSizeInBytes(MI);
29743049

29753050
return std::make_unique<outliner::OutlinedFunction>(
2976-
RepeatedSequenceLocs, SequenceSize, FrameOverhead,
2977-
MachineOutlinerDefault);
3051+
RepeatedSequenceLocs, SequenceSize, FrameOverhead, MOCI);
29783052
}
29793053

29803054
outliner::InstrType
@@ -2995,15 +3069,8 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
29953069
return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
29963070
: outliner::InstrType::Invisible;
29973071

2998-
// We need support for tail calls to outlined functions before return
2999-
// statements can be allowed.
3000-
if (MI.isReturn())
3001-
return outliner::InstrType::Illegal;
3002-
3003-
// Don't allow modifying the X5 register which we use for return addresses for
3004-
// these outlined functions.
3005-
if (MI.modifiesRegister(RISCV::X5, TRI) ||
3006-
MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
3072+
if (cannotInsertTailCall(*MBB) &&
3073+
(MI.isReturn() || isMIModifiesReg(MI, TRI, RISCV::X5)))
30073074
return outliner::InstrType::Illegal;
30083075

30093076
// Make sure the operands don't reference something unsafe.
@@ -3039,6 +3106,9 @@ void RISCVInstrInfo::buildOutlinedFrame(
30393106
}
30403107
}
30413108

3109+
if (OF.FrameConstructionID == MachineOutlinerTailCall)
3110+
return;
3111+
30423112
MBB.addLiveIn(RISCV::X5);
30433113

30443114
// Add in a return instruction to the end of the outlined frame.
@@ -3052,6 +3122,13 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
30523122
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
30533123
MachineFunction &MF, outliner::Candidate &C) const {
30543124

3125+
if (C.CallConstructionID == MachineOutlinerTailCall) {
3126+
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
3127+
.addGlobalAddress(M.getNamedValue(MF.getName()),
3128+
/*Offset=*/0, RISCVII::MO_CALL));
3129+
return It;
3130+
}
3131+
30553132
// Add in a call instruction to the outlined function at the given location.
30563133
It = MBB.insert(It,
30573134
BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; RUN: llc < %s -verify-machineinstrs -enable-machine-outliner | FileCheck %s
2+
3+
target triple = "riscv64-unknown-linux-gnu"
4+
5+
declare void @foo(i32, i32, i32, i32) minsize
6+
7+
define void @fentry0(i1 %a) nounwind {
8+
; CHECK-LABEL: fentry0:
9+
; CHECK: # %bb.1:
10+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
11+
; CHECK-NEXT: call foo
12+
; CHECK-LABEL: .LBB0_2:
13+
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
14+
entry:
15+
br i1 %a, label %if.then, label %if.end
16+
if.then:
17+
call void @foo(i32 1, i32 2, i32 3, i32 4)
18+
br label %if.end
19+
if.end:
20+
call void @foo(i32 5, i32 6, i32 7, i32 8)
21+
ret void
22+
}
23+
24+
define void @fentry1(i1 %a) nounwind {
25+
; CHECK-LABEL: fentry1:
26+
; CHECK: # %bb.1:
27+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
28+
; CHECK-NEXT: call foo
29+
; CHECK-LABEL: .LBB1_2:
30+
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
31+
entry:
32+
br i1 %a, label %if.then, label %if.end
33+
if.then:
34+
call void @foo(i32 1, i32 2, i32 3, i32 4)
35+
br label %if.end
36+
if.end:
37+
call void @foo(i32 5, i32 6, i32 7, i32 8)
38+
ret void
39+
}
40+
41+
define void @fentry2(i1 %a) nounwind {
42+
; CHECK-LABEL: fentry2:
43+
; CHECK: # %bb.1:
44+
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
45+
; CHECK-NEXT: call foo
46+
; CHECK-LABEL: .LBB2_2:
47+
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
48+
entry:
49+
br i1 %a, label %if.then, label %if.end
50+
if.then:
51+
call void @foo(i32 1, i32 2, i32 3, i32 4)
52+
br label %if.end
53+
if.end:
54+
call void @foo(i32 5, i32 6, i32 7, i32 8)
55+
ret void
56+
}
57+
58+
; CHECK: OUTLINED_FUNCTION_[[BB2]]:
59+
; CHECK: li a0, 5
60+
; CHECK-NEXT: li a1, 6
61+
; CHECK-NEXT: li a2, 7
62+
; CHECK-NEXT: li a3, 8
63+
; CHECK-NEXT: call foo
64+
65+
; CHECK: OUTLINED_FUNCTION_[[BB1]]:
66+
; CHECK: li a0, 1
67+
; CHECK-NEXT: li a1, 2
68+
; CHECK-NEXT: li a2, 3
69+
; CHECK-NEXT: li a3, 4
70+
; CHECK-NEXT: jr t0

llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@ body: |
2222
; RV32I-MO-LABEL: name: func1
2323
; RV32I-MO: liveins: $x10, $x11
2424
; RV32I-MO-NEXT: {{ $}}
25-
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
26-
; RV32I-MO-NEXT: PseudoRET
25+
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
2726
; RV64I-MO-LABEL: name: func1
2827
; RV64I-MO: liveins: $x10, $x11
2928
; RV64I-MO-NEXT: {{ $}}
30-
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
31-
; RV64I-MO-NEXT: PseudoRET
29+
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
3230
$x10 = ORI $x10, 1023
3331
CFI_INSTRUCTION offset $x1, 0
3432
$x11 = ORI $x11, 1023
@@ -49,13 +47,11 @@ body: |
4947
; RV32I-MO-LABEL: name: func2
5048
; RV32I-MO: liveins: $x10, $x11
5149
; RV32I-MO-NEXT: {{ $}}
52-
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
53-
; RV32I-MO-NEXT: PseudoRET
50+
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
5451
; RV64I-MO-LABEL: name: func2
5552
; RV64I-MO: liveins: $x10, $x11
5653
; RV64I-MO-NEXT: {{ $}}
57-
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
58-
; RV64I-MO-NEXT: PseudoRET
54+
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
5955
$x10 = ORI $x10, 1023
6056
CFI_INSTRUCTION offset $x1, 0
6157
$x11 = ORI $x11, 1023
@@ -76,13 +72,11 @@ body: |
7672
; RV32I-MO-LABEL: name: func3
7773
; RV32I-MO: liveins: $x10, $x11
7874
; RV32I-MO-NEXT: {{ $}}
79-
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
80-
; RV32I-MO-NEXT: PseudoRET
75+
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
8176
; RV64I-MO-LABEL: name: func3
8277
; RV64I-MO: liveins: $x10, $x11
8378
; RV64I-MO-NEXT: {{ $}}
84-
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
85-
; RV64I-MO-NEXT: PseudoRET
79+
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
8680
$x10 = ORI $x10, 1023
8781
CFI_INSTRUCTION offset $x1, -12
8882
$x11 = ORI $x11, 1023
@@ -96,11 +90,11 @@ body: |
9690
9791
9892
# OUTLINED-LABEL: name: OUTLINED_FUNCTION_0
99-
# OUTLINED: liveins: $x11, $x10, $x5
93+
# OUTLINED: liveins: $x11, $x10
10094
# OUTLINED-NEXT: {{ $}}
10195
# OUTLINED-NEXT: $x10 = ORI $x10, 1023
10296
# OUTLINED-NEXT: $x11 = ORI $x11, 1023
10397
# OUTLINED-NEXT: $x12 = ADDI $x10, 17
10498
# OUTLINED-NEXT: $x11 = AND $x12, $x11
10599
# OUTLINED-NEXT: $x10 = SUB $x10, $x11
106-
# OUTLINED-NEXT: $x0 = JALR $x5, 0
100+
# OUTLINED-NEXT: PseudoRET

llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,25 +94,28 @@ define i32 @_Z2f6v() minsize {
9494
; CHECK-BASELINE-NEXT: li a3, 0x4
9595
; CHECK-BASELINE-NEXT: li a4, 0x5
9696
; CHECK-BASELINE-NEXT: li a5, 0x6
97-
; CHECK-BASELINE-NEXT: jr t0
97+
; CHECK-BASELINE-NEXT: auipc t1, 0x0
98+
; CHECK-BASELINE-NEXT: jr t1
9899

99100
; CHECK-BASELINE: <OUTLINED_FUNCTION_1>:
100101
; CHECK-BASELINE-NEXT: li a0, 0x1
101102
; CHECK-BASELINE-NEXT: li a1, 0x2
102103
; CHECK-BASELINE-NEXT: li a2, 0x3
103104
; CHECK-BASELINE-NEXT: li a3, 0x4
104105
; CHECK-BASELINE-NEXT: li a4, 0x5
105-
; CHECK-BASELINE-NEXT: li a5, 0x7
106-
; CHECK-BASELINE-NEXT: jr t0
106+
; CHECK-BASELINE-NEXT: li a5, 0x8
107+
; CHECK-BASELINE-NEXT: auipc t1, 0x0
108+
; CHECK-BASELINE-NEXT: jr t1
107109

108110
; CHECK-BASELINE: <OUTLINED_FUNCTION_2>:
109111
; CHECK-BASELINE-NEXT: li a0, 0x1
110112
; CHECK-BASELINE-NEXT: li a1, 0x2
111113
; CHECK-BASELINE-NEXT: li a2, 0x3
112114
; CHECK-BASELINE-NEXT: li a3, 0x4
113115
; CHECK-BASELINE-NEXT: li a4, 0x5
114-
; CHECK-BASELINE-NEXT: li a5, 0x8
115-
; CHECK-BASELINE-NEXT: jr t0
116+
; CHECK-BASELINE-NEXT: li a5, 0x7
117+
; CHECK-BASELINE-NEXT: auipc t1, 0x0
118+
; CHECK-BASELINE-NEXT: jr t1
116119

117120
; CHECK-LEAF-DESCENDANTS: <OUTLINED_FUNCTION_0>:
118121
; CHECK-LEAF-DESCENDANTS-NEXT: li a0, 0x1

0 commit comments

Comments
 (0)