Skip to content

Commit 493529f

Browse files
committed
Re-land: [ARM] Fix frame chains with M-profile PACBTI (#110285)
When using AAPCS-compliant frame chains with PACBTI return address signing, there ware a number of bugs in the generation of the frame pointer and function prologues. The most obvious was that we sometimes would modify r11 before pushing it to the stack, so it wasn't preserved as required by the PCS. We also sometimes did not push R11 and LR adjacent to one another on the stack, or used R11 as a frame pointer without pointing it at the saved value of R11, both of which are required to have an AAPCS compliant frame chain. The original work of this patch was done by James Westwood, reviewed as #82801 and #81249, with some tidy-ups done by Mark Murray and myself.
1 parent 3a2c957 commit 493529f

File tree

6 files changed

+281
-57
lines changed

6 files changed

+281
-57
lines changed

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
116116
return CSR_iOS_SaveList;
117117

118118
if (PushPopSplit == ARMSubtarget::SplitR7)
119-
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
119+
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
120120
: CSR_ATPCS_SplitPush_SaveList;
121121

122+
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
123+
return CSR_AAPCS_SplitPush_R11_SaveList;
124+
122125
return CSR_AAPCS_SaveList;
123126
}
124127

llvm/lib/Target/ARM/ARMCallingConv.td

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
301301
def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
302302
R10)>;
303303

304-
// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
305-
// pointer even for Thumb targets, where split pushes are necessary.
306-
// This AAPCS alternative makes sure the frame index slots match the push
307-
// order in that case.
308-
def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
309-
R7, R6, R5, R4,
310-
R10, R9, R8,
311-
(sequence "D%u", 15, 8))>;
304+
// Sometimes we need to split the push of the callee-saved GPRs into two
305+
// regions, to ensure that the frame chain record is set up correctly. These
306+
// list the callee-saved registers in the order they end up on the stack, which
307+
// depends on whether the frame pointer is r7 or r11.
308+
def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
309+
LR, R11,
310+
(sequence "D%u", 15, 8))>;
311+
def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
312+
R7, R6, R5, R4,
313+
R10, R9, R8,
314+
(sequence "D%u", 15, 8))>;
312315

313316
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
314317
// and the pointer return value are both passed in R0 in these cases, this can

llvm/lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 97 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
199199
// push {r0-r10, r12} GPRCS1
200200
// vpush {r8-d15} DPRCS1
201201
// push {r11, lr} GPRCS2
202+
//
203+
// SplitR11AAPCSSignRA:
204+
// push {r0-r10, r12} GPRSC1
205+
// push {r11, lr} GPRCS2
206+
// vpush {r8-d15} DPRCS1
202207

203208
// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
204209
// the top of the stack frame.
@@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg,
246251
return SpillArea::GPRCS1;
247252

248253
case ARM::LR:
249-
if (Variation == ARMSubtarget::SplitR11WindowsSEH)
254+
if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
255+
Variation == ARMSubtarget::SplitR11AAPCSSignRA)
250256
return SpillArea::GPRCS2;
251257
else
252258
return SpillArea::GPRCS1;
@@ -863,6 +869,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
863869
// This is a conservative estimation: Assume the frame pointer being r7 and
864870
// pc("r15") up to r8 getting spilled before (= 8 registers).
865871
int MaxRegBytes = 8 * 4;
872+
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
873+
// Here, r11 can be stored below all of r4-r15.
874+
MaxRegBytes = 11 * 4;
866875
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
867876
// Here, r11 can be stored below all of r4-r15 plus d8-d15.
868877
MaxRegBytes = 11 * 4 + 8 * 8;
@@ -935,17 +944,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
935944
}
936945

937946
// Determine spill area sizes, and some important frame indices.
947+
SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
948+
bool BeforeFPPush = true;
938949
for (const CalleeSavedInfo &I : CSI) {
939950
Register Reg = I.getReg();
940951
int FI = I.getFrameIdx();
941952

942-
if (Reg == FramePtr)
953+
SpillArea Area = getSpillArea(Reg, PushPopSplit,
954+
AFI->getNumAlignedDPRCS2Regs(), RegInfo);
955+
956+
if (Reg == FramePtr) {
943957
FramePtrSpillFI = FI;
958+
FramePtrSpillArea = Area;
959+
}
944960
if (Reg == ARM::D8)
945961
D8SpillFI = FI;
946962

947-
switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
948-
RegInfo)) {
963+
switch (Area) {
949964
case SpillArea::FPCXT:
950965
FPCXTSaveSize += 4;
951966
break;
@@ -972,21 +987,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
972987
// Move past FPCXT area.
973988
if (FPCXTSaveSize > 0) {
974989
LastPush = MBBI++;
975-
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
990+
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
976991
}
977992

978993
// Allocate the vararg register save area.
979994
if (ArgRegsSaveSize) {
980995
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
981996
MachineInstr::FrameSetup);
982997
LastPush = std::prev(MBBI);
983-
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
998+
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
984999
}
9851000

9861001
// Move past area 1.
9871002
if (GPRCS1Size > 0) {
9881003
GPRCS1Push = LastPush = MBBI++;
989-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
1004+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
1005+
if (FramePtrSpillArea == SpillArea::GPRCS1)
1006+
BeforeFPPush = false;
9901007
}
9911008

9921009
// Determine starting offsets of spill areas. These offsets are all positive
@@ -1010,21 +1027,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10101027
} else {
10111028
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
10121029
}
1013-
int FramePtrOffsetInPush = 0;
10141030
if (HasFP) {
10151031
// Offset from the CFA to the saved frame pointer, will be negative.
10161032
[[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
10171033
LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
10181034
<< ", FPOffset: " << FPOffset << "\n");
10191035
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
10201036
"Max FP estimation is wrong");
1021-
// Offset from the top of the GPRCS1 area to the saved frame pointer, will
1022-
// be negative.
1023-
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
1024-
LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
1025-
<< ", FramePtrSpillOffset="
1026-
<< (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
1027-
<< "\n");
10281037
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
10291038
NumBytes);
10301039
}
@@ -1036,7 +1045,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10361045
// after DPRCS1.
10371046
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
10381047
GPRCS2Push = LastPush = MBBI++;
1039-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
1048+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1049+
if (FramePtrSpillArea == SpillArea::GPRCS2)
1050+
BeforeFPPush = false;
10401051
}
10411052

10421053
// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -1049,7 +1060,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10491060
else {
10501061
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
10511062
MachineInstr::FrameSetup);
1052-
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
1063+
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
10531064
}
10541065
}
10551066

@@ -1058,7 +1069,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10581069
// Since vpush register list cannot have gaps, there may be multiple vpush
10591070
// instructions in the prologue.
10601071
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1061-
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
1072+
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
1073+
BeforeFPPush);
10621074
LastPush = MBBI++;
10631075
}
10641076
}
@@ -1077,7 +1089,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10771089
// Move GPRCS2, if using using SplitR11WindowsSEH.
10781090
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
10791091
GPRCS2Push = LastPush = MBBI++;
1080-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
1092+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1093+
if (FramePtrSpillArea == SpillArea::GPRCS2)
1094+
BeforeFPPush = false;
10811095
}
10821096

10831097
bool NeedsWinCFIStackAlloc = NeedsWinCFI;
@@ -1178,28 +1192,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
11781192
// into spill area 1, including the FP in R11. In either case, it
11791193
// is in area one and the adjustment needs to take place just after
11801194
// that push.
1181-
// FIXME: The above is not necessary true when PACBTI is enabled.
1182-
// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1183-
// so FP ends up on area two.
11841195
MachineBasicBlock::iterator AfterPush;
11851196
if (HasFP) {
1186-
AfterPush = std::next(GPRCS1Push);
1187-
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1188-
int FPOffset = PushSize + FramePtrOffsetInPush;
1189-
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
1190-
AfterPush = std::next(GPRCS2Push);
1191-
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1192-
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1193-
} else {
1194-
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1195-
FramePtr, ARM::SP, FPOffset,
1196-
MachineInstr::FrameSetup);
1197+
MachineBasicBlock::iterator FPPushInst;
1198+
// Offset from SP immediately after the push which saved the FP to the FP
1199+
// save slot.
1200+
int64_t FPOffsetAfterPush;
1201+
switch (FramePtrSpillArea) {
1202+
case SpillArea::GPRCS1:
1203+
FPPushInst = GPRCS1Push;
1204+
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1205+
ArgRegsSaveSize + FPCXTSaveSize +
1206+
sizeOfSPAdjustment(*FPPushInst);
1207+
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1208+
<< FPOffsetAfterPush << " after that push\n");
1209+
break;
1210+
case SpillArea::GPRCS2:
1211+
FPPushInst = GPRCS2Push;
1212+
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1213+
ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1214+
sizeOfSPAdjustment(*FPPushInst);
1215+
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1216+
FPOffsetAfterPush += DPRCSSize + DPRGapSize;
1217+
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1218+
<< FPOffsetAfterPush << " after that push\n");
1219+
break;
1220+
default:
1221+
llvm_unreachable("frame pointer in unknown spill area");
1222+
break;
11971223
}
1224+
AfterPush = std::next(FPPushInst);
1225+
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1226+
assert(FPOffsetAfterPush == 0);
1227+
1228+
// Emit the MOV or ADD to set up the frame pointer register.
1229+
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1230+
FramePtr, ARM::SP, FPOffsetAfterPush,
1231+
MachineInstr::FrameSetup);
1232+
11981233
if (!NeedsWinCFI) {
1199-
if (FramePtrOffsetInPush + PushSize != 0) {
1234+
// Emit DWARF info to find the CFA using the frame pointer from this
1235+
// point onward.
1236+
if (FPOffsetAfterPush != 0) {
12001237
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
12011238
nullptr, MRI->getDwarfRegNum(FramePtr, true),
1202-
FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1239+
-MFI.getObjectOffset(FramePtrSpillFI)));
12031240
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
12041241
.addCFIIndex(CFIIndex)
12051242
.setMIFlags(MachineInstr::FrameSetup);
@@ -1712,7 +1749,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
17121749
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
17131750
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
17141751
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1715-
PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
1752+
(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1753+
PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
17161754
Reg = ARM::PC;
17171755
// Fold the return instruction into the LDM.
17181756
DeleteRet = true;
@@ -2945,18 +2983,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
29452983
const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
29462984
if (AFI.shouldSignReturnAddress()) {
29472985
// The order of register must match the order we push them, because the
2948-
// PEI assigns frame indices in that order. When compiling for return
2949-
// address sign and authenication, we use split push, therefore the orders
2950-
// we want are:
2951-
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2952-
CSI.insert(find_if(CSI,
2953-
[=](const auto &CS) {
2954-
Register Reg = CS.getReg();
2955-
return Reg == ARM::R10 || Reg == ARM::R11 ||
2956-
Reg == ARM::R8 || Reg == ARM::R9 ||
2957-
ARM::DPRRegClass.contains(Reg);
2958-
}),
2959-
CalleeSavedInfo(ARM::R12));
2986+
// PEI assigns frame indices in that order. That order depends on the
2987+
// PushPopSplitVariation, there are only two cases which we use with return
2988+
// address signing:
2989+
switch (STI.getPushPopSplitVariation(MF)) {
2990+
case ARMSubtarget::SplitR7:
2991+
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2992+
CSI.insert(find_if(CSI,
2993+
[=](const auto &CS) {
2994+
Register Reg = CS.getReg();
2995+
return Reg == ARM::R10 || Reg == ARM::R11 ||
2996+
Reg == ARM::R8 || Reg == ARM::R9 ||
2997+
ARM::DPRRegClass.contains(Reg);
2998+
}),
2999+
CalleeSavedInfo(ARM::R12));
3000+
break;
3001+
case ARMSubtarget::SplitR11AAPCSSignRA:
3002+
// With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3003+
// on the stack.
3004+
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
3005+
break;
3006+
default:
3007+
llvm_unreachable("Unexpected CSR split with return address signing");
3008+
}
29603009
}
29613010

29623011
return false;

llvm/lib/Target/ARM/ARMSubtarget.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
514514
F.needsUnwindTableEntry() &&
515515
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
516516
return SplitR11WindowsSEH;
517+
518+
// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
519+
// other in the list of callee saved registers in a frame, and branch
520+
// signing is enabled.
521+
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
522+
getFramePointerReg() == ARM::R11)
523+
return SplitR11AAPCSSignRA;
517524
return NoSplit;
518525
}

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
105105
/// vpush {d8-d15}
106106
/// push {r11, lr}
107107
SplitR11WindowsSEH,
108+
109+
/// When generating AAPCS-compilant frame chains, R11 is the frame pointer,
110+
/// and must be pushed adjacent to the return address (LR). Normally this
111+
/// isn't a problem, because the only register between them is r12, which is
112+
/// the intra-procedure-call scratch register, so doesn't need to be saved.
113+
/// However, when PACBTI is in use, r12 contains the authentication code, so
114+
/// does need to be saved. This means that we need a separate push for R11
115+
/// and LR.
116+
/// push {r0-r10, r12}
117+
/// push {r11, lr}
118+
/// vpush {d8-d15}
119+
SplitR11AAPCSSignRA,
108120
};
109121

110122
protected:

0 commit comments

Comments
 (0)