Skip to content

Commit 708a478

Browse files
authored
[RISCV] Add stack clash protection (#117612)
Enable `-fstack-clash-protection` for RISCV and stack probe for function prologues. We probe the stack by creating a loop that allocates and probe the stack in ProbeSize chunks. We emit an unrolled probe loop for small allocations and emit a variable length probe loop for bigger ones.
1 parent 5a0d73b commit 708a478

11 files changed

+1151
-21
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3777,7 +3777,8 @@ static void RenderSCPOptions(const ToolChain &TC, const ArgList &Args,
37773777
return;
37783778

37793779
if (!EffectiveTriple.isX86() && !EffectiveTriple.isSystemZ() &&
3780-
!EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64())
3780+
!EffectiveTriple.isPPC64() && !EffectiveTriple.isAArch64() &&
3781+
!EffectiveTriple.isRISCV())
37813782
return;
37823783

37833784
Args.addOptInFlag(CmdArgs, options::OPT_fstack_clash_protection,

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 184 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -580,25 +580,124 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
580580
Comment.str());
581581
}
582582

583+
// Allocate stack space and probe it if necessary.
583584
void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
584585
MachineBasicBlock::iterator MBBI,
585-
MachineFunction &MF, StackOffset Offset,
586-
uint64_t RealStackSize,
587-
bool EmitCFI) const {
586+
MachineFunction &MF, uint64_t Offset,
587+
uint64_t RealStackSize, bool EmitCFI,
588+
bool NeedProbe,
589+
uint64_t ProbeSize) const {
588590
DebugLoc DL;
589591
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
590592
const RISCVInstrInfo *TII = STI.getInstrInfo();
591593

592-
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup,
594+
// Simply allocate the stack if it's not big enough to require a probe.
595+
if (!NeedProbe || Offset <= ProbeSize) {
596+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Offset),
597+
MachineInstr::FrameSetup, getStackAlign());
598+
599+
if (EmitCFI) {
600+
// Emit ".cfi_def_cfa_offset RealStackSize"
601+
unsigned CFIIndex = MF.addFrameInst(
602+
MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
603+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
604+
.addCFIIndex(CFIIndex)
605+
.setMIFlag(MachineInstr::FrameSetup);
606+
}
607+
608+
return;
609+
}
610+
611+
// Unroll the probe loop depending on the number of iterations.
612+
if (Offset < ProbeSize * 5) {
613+
uint64_t CurrentOffset = 0;
614+
bool IsRV64 = STI.is64Bit();
615+
while (CurrentOffset + ProbeSize <= Offset) {
616+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
617+
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
618+
getStackAlign());
619+
// s[d|w] zero, 0(sp)
620+
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
621+
.addReg(RISCV::X0)
622+
.addReg(SPReg)
623+
.addImm(0)
624+
.setMIFlags(MachineInstr::FrameSetup);
625+
626+
CurrentOffset += ProbeSize;
627+
if (EmitCFI) {
628+
// Emit ".cfi_def_cfa_offset CurrentOffset"
629+
unsigned CFIIndex = MF.addFrameInst(
630+
MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
631+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
632+
.addCFIIndex(CFIIndex)
633+
.setMIFlag(MachineInstr::FrameSetup);
634+
}
635+
}
636+
637+
uint64_t Residual = Offset - CurrentOffset;
638+
if (Residual) {
639+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
640+
StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
641+
getStackAlign());
642+
if (EmitCFI) {
643+
// Emit ".cfi_def_cfa_offset Offset"
644+
unsigned CFIIndex =
645+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
646+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
647+
.addCFIIndex(CFIIndex)
648+
.setMIFlag(MachineInstr::FrameSetup);
649+
}
650+
}
651+
652+
return;
653+
}
654+
655+
// Emit a variable-length allocation probing loop.
656+
uint64_t RoundedSize = alignDown(Offset, ProbeSize);
657+
uint64_t Residual = Offset - RoundedSize;
658+
659+
Register TargetReg = RISCV::X6;
660+
// SUB TargetReg, SP, RoundedSize
661+
RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
662+
StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
593663
getStackAlign());
594664

595665
if (EmitCFI) {
596-
// Emit ".cfi_def_cfa_offset RealStackSize"
597-
unsigned CFIIndex = MF.addFrameInst(
598-
MCCFIInstruction::cfiDefCfaOffset(nullptr, RealStackSize));
666+
// Set the CFA register to TargetReg.
667+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
668+
unsigned CFIIndex =
669+
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
599670
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
600671
.addCFIIndex(CFIIndex)
601-
.setMIFlag(MachineInstr::FrameSetup);
672+
.setMIFlags(MachineInstr::FrameSetup);
673+
}
674+
675+
// It will be expanded to a probe loop in `inlineStackProbe`.
676+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
677+
.addReg(SPReg)
678+
.addReg(TargetReg);
679+
680+
if (EmitCFI) {
681+
// Set the CFA register back to SP.
682+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
683+
unsigned CFIIndex =
684+
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
685+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
686+
.addCFIIndex(CFIIndex)
687+
.setMIFlags(MachineInstr::FrameSetup);
688+
}
689+
690+
if (Residual)
691+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
692+
MachineInstr::FrameSetup, getStackAlign());
693+
694+
if (EmitCFI) {
695+
// Emit ".cfi_def_cfa_offset Offset"
696+
unsigned CFIIndex =
697+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
698+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
699+
.addCFIIndex(CFIIndex)
700+
.setMIFlags(MachineInstr::FrameSetup);
602701
}
603702
}
604703

@@ -716,11 +815,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
716815
getPushOrLibCallsSavedInfo(MF, CSI));
717816
}
718817

719-
if (StackSize != 0) {
720-
// Allocate space on the stack if necessary.
721-
allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-StackSize),
722-
RealStackSize, /*EmitCFI=*/true);
723-
}
818+
// Allocate space on the stack if necessary.
819+
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
820+
const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
821+
bool NeedProbe = TLI->hasInlineStackProbe(MF);
822+
uint64_t ProbeSize = TLI->getStackProbeSize(MF, getStackAlign());
823+
if (StackSize != 0)
824+
allocateStack(MBB, MBBI, MF, StackSize, RealStackSize, /*EmitCFI=*/true,
825+
NeedProbe, ProbeSize);
724826

725827
// The frame pointer is callee-saved, and code has been generated for us to
726828
// save it to the stack. We need to skip over the storing of callee-saved
@@ -761,8 +863,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
761863
assert(SecondSPAdjustAmount > 0 &&
762864
"SecondSPAdjustAmount should be greater than zero");
763865

764-
allocateStack(MBB, MBBI, MF, StackOffset::getFixed(-SecondSPAdjustAmount),
765-
getStackSizeWithRVVPadding(MF), !hasFP(MF));
866+
allocateStack(MBB, MBBI, MF, SecondSPAdjustAmount,
867+
getStackSizeWithRVVPadding(MF), !hasFP(MF), NeedProbe,
868+
ProbeSize);
766869
}
767870

768871
if (RVVStackSize) {
@@ -1910,3 +2013,69 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
19102013
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
19112014
return TargetStackID::ScalableVector;
19122015
}
2016+
2017+
// Synthesize the probe loop.
2018+
static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
2019+
MachineBasicBlock::iterator MBBI,
2020+
DebugLoc DL) {
2021+
2022+
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
2023+
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
2024+
bool IsRV64 = Subtarget.is64Bit();
2025+
Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
2026+
const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
2027+
uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2028+
2029+
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
2030+
MachineBasicBlock *LoopTestMBB =
2031+
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2032+
MF.insert(MBBInsertPoint, LoopTestMBB);
2033+
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2034+
MF.insert(MBBInsertPoint, ExitMBB);
2035+
MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2036+
Register TargetReg = RISCV::X6;
2037+
Register ScratchReg = RISCV::X7;
2038+
2039+
// ScratchReg = ProbeSize
2040+
TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2041+
2042+
// LoopTest:
2043+
// SUB SP, SP, ProbeSize
2044+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
2045+
.addReg(SPReg)
2046+
.addReg(ScratchReg)
2047+
.setMIFlags(Flags);
2048+
2049+
// s[d|w] zero, 0(sp)
2050+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
2051+
TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
2052+
.addReg(RISCV::X0)
2053+
.addReg(SPReg)
2054+
.addImm(0)
2055+
.setMIFlags(Flags);
2056+
2057+
// BNE SP, TargetReg, LoopTest
2058+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
2059+
.addReg(SPReg)
2060+
.addReg(TargetReg)
2061+
.addMBB(LoopTestMBB)
2062+
.setMIFlags(Flags);
2063+
2064+
ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
2065+
2066+
LoopTestMBB->addSuccessor(ExitMBB);
2067+
LoopTestMBB->addSuccessor(LoopTestMBB);
2068+
MBB.addSuccessor(LoopTestMBB);
2069+
}
2070+
2071+
void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
2072+
MachineBasicBlock &MBB) const {
2073+
auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
2074+
return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
2075+
});
2076+
if (Where != MBB.end()) {
2077+
DebugLoc DL = MBB.findDebugLoc(Where);
2078+
emitStackProbeInline(MF, MBB, Where, DL);
2079+
Where->eraseFromParent();
2080+
}
2081+
}

llvm/lib/Target/RISCV/RISCVFrameLowering.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
7979
}
8080

8181
void allocateStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
82-
MachineFunction &MF, StackOffset Offset,
83-
uint64_t RealStackSize, bool EmitCFI) const;
82+
MachineFunction &MF, uint64_t Offset,
83+
uint64_t RealStackSize, bool EmitCFI, bool NeedProbe,
84+
uint64_t ProbeSize) const;
8485

8586
protected:
8687
const RISCVSubtarget &STI;
@@ -103,6 +104,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
103104

104105
std::pair<int64_t, Align>
105106
assignRVVStackObjectOffsets(MachineFunction &MF) const;
107+
// Replace a StackProbe stub (if any) with the actual probe code inline
108+
void inlineStackProbe(MachineFunction &MF,
109+
MachineBasicBlock &PrologueMBB) const override;
106110
};
107111
} // namespace llvm
108112
#endif

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22350,3 +22350,25 @@ namespace llvm::RISCVVIntrinsicsTable {
2235022350
#include "RISCVGenSearchableTables.inc"
2235122351

2235222352
} // namespace llvm::RISCVVIntrinsicsTable
22353+
22354+
bool RISCVTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
22355+
22356+
// If the function specifically requests inline stack probes, emit them.
22357+
if (MF.getFunction().hasFnAttribute("probe-stack"))
22358+
return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
22359+
"inline-asm";
22360+
22361+
return false;
22362+
}
22363+
22364+
unsigned RISCVTargetLowering::getStackProbeSize(const MachineFunction &MF,
22365+
Align StackAlign) const {
22366+
// The default stack probe size is 4096 if the function has no
22367+
// stack-probe-size attribute.
22368+
const Function &Fn = MF.getFunction();
22369+
unsigned StackProbeSize =
22370+
Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
22371+
// Round down to the stack alignment.
22372+
StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
22373+
return StackProbeSize ? StackProbeSize : StackAlign.value();
22374+
}

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,6 +919,11 @@ class RISCVTargetLowering : public TargetLowering {
919919
MachineBasicBlock::instr_iterator &MBBI,
920920
const TargetInstrInfo *TII) const override;
921921

922+
/// True if stack clash protection is enabled for this functions.
923+
bool hasInlineStackProbe(const MachineFunction &MF) const override;
924+
925+
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const;
926+
922927
private:
923928
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
924929
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
13731373
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
13741374
(ADDI GPR:$rs1, simm12:$imm12)>;
13751375

1376+
/// Stack probing
1377+
1378+
let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
1379+
// Probed stack allocation of a constant size, used in function prologues when
1380+
// stack-clash protection is enabled.
1381+
def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
1382+
(ins GPR:$scratch),
1383+
[]>,
1384+
Sched<[]>;
1385+
}
1386+
13761387
/// HI and ADD_LO address nodes.
13771388

13781389
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.

llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "RISCVMachineFunctionInfo.h"
14+
#include "llvm/IR/Module.h"
1415

1516
using namespace llvm;
1617

@@ -26,6 +27,35 @@ MachineFunctionInfo *RISCVMachineFunctionInfo::clone(
2627
return DestMF.cloneInfo<RISCVMachineFunctionInfo>(*this);
2728
}
2829

30+
RISCVMachineFunctionInfo::RISCVMachineFunctionInfo(const Function &F,
31+
const RISCVSubtarget *STI) {
32+
33+
// The default stack probe size is 4096 if the function has no
34+
// stack-probe-size attribute. This is a safe default because it is the
35+
// smallest possible guard page size.
36+
uint64_t ProbeSize = 4096;
37+
if (F.hasFnAttribute("stack-probe-size"))
38+
ProbeSize = F.getFnAttributeAsParsedInteger("stack-probe-size");
39+
else if (const auto *PS = mdconst::extract_or_null<ConstantInt>(
40+
F.getParent()->getModuleFlag("stack-probe-size")))
41+
ProbeSize = PS->getZExtValue();
42+
assert(int64_t(ProbeSize) > 0 && "Invalid stack probe size");
43+
44+
// Round down to the stack alignment.
45+
uint64_t StackAlign =
46+
STI->getFrameLowering()->getTransientStackAlign().value();
47+
ProbeSize = std::max(StackAlign, alignDown(ProbeSize, StackAlign));
48+
StringRef ProbeKind;
49+
if (F.hasFnAttribute("probe-stack"))
50+
ProbeKind = F.getFnAttribute("probe-stack").getValueAsString();
51+
else if (const auto *PS = dyn_cast_or_null<MDString>(
52+
F.getParent()->getModuleFlag("probe-stack")))
53+
ProbeKind = PS->getString();
54+
if (ProbeKind.size()) {
55+
StackProbeSize = ProbeSize;
56+
}
57+
}
58+
2959
void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) {
3060
MappingTraits<RISCVMachineFunctionInfo>::mapping(YamlIO, *this);
3161
}

llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
7676
unsigned RVPushRegs = 0;
7777
int RVPushRlist = llvm::RISCVZC::RLISTENCODE::INVALID_RLIST;
7878

79+
int64_t StackProbeSize = 0;
80+
7981
public:
80-
RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {}
82+
RISCVMachineFunctionInfo(const Function &F, const RISCVSubtarget *STI);
8183

8284
MachineFunctionInfo *
8385
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,8 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
271271
MachineFunctionInfo *RISCVTargetMachine::createMachineFunctionInfo(
272272
BumpPtrAllocator &Allocator, const Function &F,
273273
const TargetSubtargetInfo *STI) const {
274-
return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(Allocator,
275-
F, STI);
274+
return RISCVMachineFunctionInfo::create<RISCVMachineFunctionInfo>(
275+
Allocator, F, static_cast<const RISCVSubtarget *>(STI));
276276
}
277277

278278
TargetTransformInfo

0 commit comments

Comments
 (0)