Skip to content

Commit e1f8f84

Browse files
authored
[ARM] Fix frame chains with M-profile PACBTI (#110285)
When using AAPCS-compliant frame chains with PACBTI return address signing, there ware a number of bugs in the generation of the frame pointer and function prologues. The most obvious was that we sometimes would modify r11 before pushing it to the stack, so it wasn't preserved as required by the PCS. We also sometimes did not push R11 and LR adjacent to one another on the stack, or used R11 as a frame pointer without pointing it at the saved value of R11, both of which are required to have an AAPCS compliant frame chain. The original work of this patch was done by James Westwood, reviewed as #82801 and #81249, with some tidy-ups done by Mark Murray and myself.
1 parent cb43021 commit e1f8f84

File tree

6 files changed

+281
-57
lines changed

6 files changed

+281
-57
lines changed

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
116116
return CSR_iOS_SaveList;
117117

118118
if (PushPopSplit == ARMSubtarget::SplitR7)
119-
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
119+
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
120120
: CSR_ATPCS_SplitPush_SaveList;
121121

122+
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
123+
return CSR_AAPCS_SplitPush_R11_SaveList;
124+
122125
return CSR_AAPCS_SaveList;
123126
}
124127

llvm/lib/Target/ARM/ARMCallingConv.td

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
301301
def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
302302
R10)>;
303303

304-
// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
305-
// pointer even for Thumb targets, where split pushes are necessary.
306-
// This AAPCS alternative makes sure the frame index slots match the push
307-
// order in that case.
308-
def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
309-
R7, R6, R5, R4,
310-
R10, R9, R8,
311-
(sequence "D%u", 15, 8))>;
304+
// Sometimes we need to split the push of the callee-saved GPRs into two
305+
// regions, to ensure that the frame chain record is set up correctly. These
306+
// list the callee-saved registers in the order they end up on the stack, which
307+
// depends on whether the frame pointer is r7 or r11.
308+
def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
309+
LR, R11,
310+
(sequence "D%u", 15, 8))>;
311+
def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
312+
R7, R6, R5, R4,
313+
R10, R9, R8,
314+
(sequence "D%u", 15, 8))>;
312315

313316
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
314317
// and the pointer return value are both passed in R0 in these cases, this can

llvm/lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 97 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
199199
// push {r0-r10, r12} GPRCS1
200200
// vpush {r8-d15} DPRCS1
201201
// push {r11, lr} GPRCS2
202+
//
203+
// SplitR11AAPCSSignRA:
204+
// push {r0-r10, r12} GPRSC1
205+
// push {r11, lr} GPRCS2
206+
// vpush {r8-d15} DPRCS1
202207

203208
// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
204209
// the top of the stack frame.
@@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg,
246251
return SpillArea::GPRCS1;
247252

248253
case ARM::LR:
249-
if (Variation == ARMSubtarget::SplitR11WindowsSEH)
254+
if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
255+
Variation == ARMSubtarget::SplitR11AAPCSSignRA)
250256
return SpillArea::GPRCS2;
251257
else
252258
return SpillArea::GPRCS1;
@@ -859,6 +865,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
859865
// This is a conservative estimation: Assume the frame pointer being r7 and
860866
// pc("r15") up to r8 getting spilled before (= 8 registers).
861867
int MaxRegBytes = 8 * 4;
868+
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
869+
// Here, r11 can be stored below all of r4-r15.
870+
MaxRegBytes = 11 * 4;
862871
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
863872
// Here, r11 can be stored below all of r4-r15 plus d8-d15.
864873
MaxRegBytes = 11 * 4 + 8 * 8;
@@ -931,17 +940,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
931940
}
932941

933942
// Determine spill area sizes, and some important frame indices.
943+
SpillArea FramePtrSpillArea;
944+
bool BeforeFPPush = true;
934945
for (const CalleeSavedInfo &I : CSI) {
935946
Register Reg = I.getReg();
936947
int FI = I.getFrameIdx();
937948

938-
if (Reg == FramePtr)
949+
SpillArea Area = getSpillArea(Reg, PushPopSplit,
950+
AFI->getNumAlignedDPRCS2Regs(), RegInfo);
951+
952+
if (Reg == FramePtr) {
939953
FramePtrSpillFI = FI;
954+
FramePtrSpillArea = Area;
955+
}
940956
if (Reg == ARM::D8)
941957
D8SpillFI = FI;
942958

943-
switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
944-
RegInfo)) {
959+
switch (Area) {
945960
case SpillArea::FPCXT:
946961
FPCXTSaveSize += 4;
947962
break;
@@ -968,21 +983,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
968983
// Move past FPCXT area.
969984
if (FPCXTSaveSize > 0) {
970985
LastPush = MBBI++;
971-
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
986+
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
972987
}
973988

974989
// Allocate the vararg register save area.
975990
if (ArgRegsSaveSize) {
976991
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
977992
MachineInstr::FrameSetup);
978993
LastPush = std::prev(MBBI);
979-
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
994+
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
980995
}
981996

982997
// Move past area 1.
983998
if (GPRCS1Size > 0) {
984999
GPRCS1Push = LastPush = MBBI++;
985-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
1000+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
1001+
if (FramePtrSpillArea == SpillArea::GPRCS1)
1002+
BeforeFPPush = false;
9861003
}
9871004

9881005
// Determine starting offsets of spill areas. These offsets are all positive
@@ -1006,21 +1023,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10061023
} else {
10071024
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
10081025
}
1009-
int FramePtrOffsetInPush = 0;
10101026
if (HasFP) {
10111027
// Offset from the CFA to the saved frame pointer, will be negative.
10121028
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
10131029
LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
10141030
<< ", FPOffset: " << FPOffset << "\n");
10151031
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
10161032
"Max FP estimation is wrong");
1017-
// Offset from the top of the GPRCS1 area to the saved frame pointer, will
1018-
// be negative.
1019-
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
1020-
LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
1021-
<< ", FramePtrSpillOffset="
1022-
<< (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
1023-
<< "\n");
10241033
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
10251034
NumBytes);
10261035
}
@@ -1032,7 +1041,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10321041
// after DPRCS1.
10331042
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
10341043
GPRCS2Push = LastPush = MBBI++;
1035-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
1044+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1045+
if (FramePtrSpillArea == SpillArea::GPRCS2)
1046+
BeforeFPPush = false;
10361047
}
10371048

10381049
// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -1045,7 +1056,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10451056
else {
10461057
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
10471058
MachineInstr::FrameSetup);
1048-
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
1059+
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
10491060
}
10501061
}
10511062

@@ -1054,7 +1065,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10541065
// Since vpush register list cannot have gaps, there may be multiple vpush
10551066
// instructions in the prologue.
10561067
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1057-
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
1068+
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
1069+
BeforeFPPush);
10581070
LastPush = MBBI++;
10591071
}
10601072
}
@@ -1073,7 +1085,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10731085
// Move GPRCS2, if using using SplitR11WindowsSEH.
10741086
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
10751087
GPRCS2Push = LastPush = MBBI++;
1076-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
1088+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1089+
if (FramePtrSpillArea == SpillArea::GPRCS2)
1090+
BeforeFPPush = false;
10771091
}
10781092

10791093
bool NeedsWinCFIStackAlloc = NeedsWinCFI;
@@ -1174,28 +1188,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
11741188
// into spill area 1, including the FP in R11. In either case, it
11751189
// is in area one and the adjustment needs to take place just after
11761190
// that push.
1177-
// FIXME: The above is not necessary true when PACBTI is enabled.
1178-
// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1179-
// so FP ends up on area two.
11801191
MachineBasicBlock::iterator AfterPush;
11811192
if (HasFP) {
1182-
AfterPush = std::next(GPRCS1Push);
1183-
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1184-
int FPOffset = PushSize + FramePtrOffsetInPush;
1185-
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
1186-
AfterPush = std::next(GPRCS2Push);
1187-
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1188-
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1189-
} else {
1190-
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1191-
FramePtr, ARM::SP, FPOffset,
1192-
MachineInstr::FrameSetup);
1193+
MachineBasicBlock::iterator FPPushInst;
1194+
// Offset from SP immediately after the push which saved the FP to the FP
1195+
// save slot.
1196+
int64_t FPOffsetAfterPush;
1197+
switch (FramePtrSpillArea) {
1198+
case SpillArea::GPRCS1:
1199+
FPPushInst = GPRCS1Push;
1200+
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1201+
ArgRegsSaveSize + FPCXTSaveSize +
1202+
sizeOfSPAdjustment(*FPPushInst);
1203+
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1204+
<< FPOffsetAfterPush << " after that push\n");
1205+
break;
1206+
case SpillArea::GPRCS2:
1207+
FPPushInst = GPRCS2Push;
1208+
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1209+
ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1210+
sizeOfSPAdjustment(*FPPushInst);
1211+
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1212+
FPOffsetAfterPush += DPRCSSize + DPRGapSize;
1213+
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1214+
<< FPOffsetAfterPush << " after that push\n");
1215+
break;
1216+
default:
1217+
llvm_unreachable("frame pointer in unknown spill area");
1218+
break;
11931219
}
1220+
AfterPush = std::next(FPPushInst);
1221+
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1222+
assert(FPOffsetAfterPush == 0);
1223+
1224+
// Emit the MOV or ADD to set up the frame pointer register.
1225+
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1226+
FramePtr, ARM::SP, FPOffsetAfterPush,
1227+
MachineInstr::FrameSetup);
1228+
11941229
if (!NeedsWinCFI) {
1195-
if (FramePtrOffsetInPush + PushSize != 0) {
1230+
// Emit DWARF info to find the CFA using the frame pointer from this
1231+
// point onward.
1232+
if (FPOffsetAfterPush != 0) {
11961233
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
11971234
nullptr, MRI->getDwarfRegNum(FramePtr, true),
1198-
FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1235+
-MFI.getObjectOffset(FramePtrSpillFI)));
11991236
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
12001237
.addCFIIndex(CFIIndex)
12011238
.setMIFlags(MachineInstr::FrameSetup);
@@ -1708,7 +1745,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
17081745
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
17091746
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
17101747
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1711-
PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
1748+
(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1749+
PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
17121750
Reg = ARM::PC;
17131751
// Fold the return instruction into the LDM.
17141752
DeleteRet = true;
@@ -2940,18 +2978,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
29402978
const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
29412979
if (AFI.shouldSignReturnAddress()) {
29422980
// The order of register must match the order we push them, because the
2943-
// PEI assigns frame indices in that order. When compiling for return
2944-
// address sign and authenication, we use split push, therefore the orders
2945-
// we want are:
2946-
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2947-
CSI.insert(find_if(CSI,
2948-
[=](const auto &CS) {
2949-
Register Reg = CS.getReg();
2950-
return Reg == ARM::R10 || Reg == ARM::R11 ||
2951-
Reg == ARM::R8 || Reg == ARM::R9 ||
2952-
ARM::DPRRegClass.contains(Reg);
2953-
}),
2954-
CalleeSavedInfo(ARM::R12));
2981+
// PEI assigns frame indices in that order. That order depends on the
2982+
// PushPopSplitVariation, there are only two cases which we use with return
2983+
// address signing:
2984+
switch (STI.getPushPopSplitVariation(MF)) {
2985+
case ARMSubtarget::SplitR7:
2986+
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2987+
CSI.insert(find_if(CSI,
2988+
[=](const auto &CS) {
2989+
Register Reg = CS.getReg();
2990+
return Reg == ARM::R10 || Reg == ARM::R11 ||
2991+
Reg == ARM::R8 || Reg == ARM::R9 ||
2992+
ARM::DPRRegClass.contains(Reg);
2993+
}),
2994+
CalleeSavedInfo(ARM::R12));
2995+
break;
2996+
case ARMSubtarget::SplitR11AAPCSSignRA:
2997+
// With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
2998+
// on the stack.
2999+
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
3000+
break;
3001+
default:
3002+
llvm_unreachable("Unexpected CSR split with return address signing");
3003+
}
29553004
}
29563005

29573006
return false;

llvm/lib/Target/ARM/ARMSubtarget.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
514514
F.needsUnwindTableEntry() &&
515515
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
516516
return SplitR11WindowsSEH;
517+
518+
// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
519+
// other in the list of callee saved registers in a frame, and branch
520+
// signing is enabled.
521+
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
522+
getFramePointerReg() == ARM::R11)
523+
return SplitR11AAPCSSignRA;
517524
return NoSplit;
518525
}

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
105105
/// vpush {d8-d15}
106106
/// push {r11, lr}
107107
SplitR11WindowsSEH,
108+
109+
/// When generating AAPCS-compilant frame chains, R11 is the frame pointer,
110+
/// and must be pushed adjacent to the return address (LR). Normally this
111+
/// isn't a problem, because the only register between them is r12, which is
112+
/// the intra-procedure-call scratch register, so doesn't need to be saved.
113+
/// However, when PACBTI is in use, r12 contains the authentication code, so
114+
/// does need to be saved. This means that we need a separate push for R11
115+
/// and LR.
116+
/// push {r0-r10, r12}
117+
/// push {r11, lr}
118+
/// vpush {d8-d15}
119+
SplitR11AAPCSSignRA,
108120
};
109121

110122
protected:

0 commit comments

Comments
 (0)