Skip to content

Commit 529235f

Browse files
committed
[ARM] Fix bugs with AAPCS frame chains and PACBTI
When using AAPCS-compliant frame chains with PACBTI return address signing, there ware a number of bugs in the generation of the frame pointer and function prologues. The most obvious was that we sometimes would modify r11 before pushing it to the stack, so it wasn't preserved as required by the PCS. We also sometimes did not push R11 and LR adjacent to one another on the stack, or used R11 as a frame pointer without pointing it at the saved value of R11, both of which are required to have an AAPCS compliant frame chain. The original work of this patch was done by James Westwood, reviewed as #82801 and #81249, with some tidy-ups done by Mark Murray and myself.
1 parent c4812b5 commit 529235f

File tree

6 files changed

+173
-92
lines changed

6 files changed

+173
-92
lines changed

llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
116116
return CSR_iOS_SaveList;
117117

118118
if (PushPopSplit == ARMSubtarget::SplitR7)
119-
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
119+
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
120120
: CSR_ATPCS_SplitPush_SaveList;
121121

122+
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
123+
return CSR_AAPCS_SplitPush_R11_SaveList;
124+
122125
return CSR_AAPCS_SaveList;
123126
}
124127

llvm/lib/Target/ARM/ARMCallingConv.td

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
301301
def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
302302
R10)>;
303303

304-
// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
305-
// pointer even for Thumb targets, where split pushes are necessary.
306-
// This AAPCS alternative makes sure the frame index slots match the push
307-
// order in that case.
308-
def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
309-
R7, R6, R5, R4,
310-
R10, R9, R8,
311-
(sequence "D%u", 15, 8))>;
304+
// Sometimes we need to split the push of the callee-saved GPRs into two
305+
// regions, to ensure that the frame chain record is set up correctly. These
306+
// list the callee-saved registers in the order they end up on the stack, which
307+
// depends on whether the frame pointer is r7 or r11.
308+
def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
309+
LR, R11,
310+
(sequence "D%u", 15, 8))>;
311+
def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
312+
R7, R6, R5, R4,
313+
R10, R9, R8,
314+
(sequence "D%u", 15, 8))>;
312315

313316
// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
314317
// and the pointer return value are both passed in R0 in these cases, this can

llvm/lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 96 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
199199
// push {r0-r10, r12} GPRCS1
200200
// vpush {r8-d15} DPRCS1
201201
// push {r11, lr} GPRCS2
202+
//
203+
// SplitR11AAPCSSignRA:
204+
// push {r0-r10, r12} GPRSC1
205+
// push {r11, lr} GPRCS2
206+
// vpush {r8-d15} DPRCS1
202207

203208
// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
204209
// the top of the stack frame.
@@ -238,7 +243,8 @@ SpillArea getSpillArea(Register Reg,
238243
return SpillArea::GPRCS1;
239244

240245
case ARM::LR:
241-
if (Variation == ARMSubtarget::SplitR11WindowsSEH)
246+
if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
247+
Variation == ARMSubtarget::SplitR11AAPCSSignRA)
242248
return SpillArea::GPRCS2;
243249
else
244250
return SpillArea::GPRCS1;
@@ -827,6 +833,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
827833
// This is a conservative estimation: Assume the frame pointer being r7 and
828834
// pc("r15") up to r8 getting spilled before (= 8 registers).
829835
int MaxRegBytes = 8 * 4;
836+
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
837+
// Here, r11 can be stored below all of r4-r15.
838+
MaxRegBytes = 11 * 4;
830839
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
831840
// Here, r11 can be stored below all of r4-r15 plus d8-d15.
832841
MaxRegBytes = 11 * 4 + 8 * 8;
@@ -899,17 +908,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
899908
}
900909

901910
// Determine spill area sizes, and some important frame indices.
911+
SpillArea FramePtrSpillArea;
912+
bool BeforeFPPush = true;
902913
for (const CalleeSavedInfo &I : CSI) {
903914
Register Reg = I.getReg();
904915
int FI = I.getFrameIdx();
905916

906-
if (Reg == FramePtr)
917+
SpillArea Area = getSpillArea(Reg, PushPopSplit,
918+
AFI->getNumAlignedDPRCS2Regs(), RegInfo);
919+
920+
if (Reg == FramePtr) {
907921
FramePtrSpillFI = FI;
922+
FramePtrSpillArea = Area;
923+
}
908924
if (Reg == ARM::D8)
909925
D8SpillFI = FI;
910926

911-
switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
912-
RegInfo)) {
927+
switch (Area) {
913928
case SpillArea::FPCXT:
914929
FPCXTSaveSize += 4;
915930
break;
@@ -936,21 +951,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
936951
// Move past FPCXT area.
937952
if (FPCXTSaveSize > 0) {
938953
LastPush = MBBI++;
939-
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
954+
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
940955
}
941956

942957
// Allocate the vararg register save area.
943958
if (ArgRegsSaveSize) {
944959
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
945960
MachineInstr::FrameSetup);
946961
LastPush = std::prev(MBBI);
947-
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
962+
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
948963
}
949964

950965
// Move past area 1.
951966
if (GPRCS1Size > 0) {
952967
GPRCS1Push = LastPush = MBBI++;
953-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
968+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
969+
if (FramePtrSpillArea == SpillArea::GPRCS1)
970+
BeforeFPPush = false;
954971
}
955972

956973
// Determine starting offsets of spill areas. These offsets are all positive
@@ -974,21 +991,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
974991
} else {
975992
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
976993
}
977-
int FramePtrOffsetInPush = 0;
978994
if (HasFP) {
979995
// Offset from the CFA to the saved frame pointer, will be negative.
980996
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
981997
LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
982998
<< ", FPOffset: " << FPOffset << "\n");
983999
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
9841000
"Max FP estimation is wrong");
985-
// Offset from the top of the GPRCS1 area to the saved frame pointer, will
986-
// be negative.
987-
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
988-
LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
989-
<< ", FramePtrSpillOffset="
990-
<< (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
991-
<< "\n");
9921001
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
9931002
NumBytes);
9941003
}
@@ -1000,7 +1009,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10001009
// after DPRCS1.
10011010
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
10021011
GPRCS2Push = LastPush = MBBI++;
1003-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
1012+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1013+
if (FramePtrSpillArea == SpillArea::GPRCS2)
1014+
BeforeFPPush = false;
10041015
}
10051016

10061017
// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -1013,7 +1024,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10131024
else {
10141025
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
10151026
MachineInstr::FrameSetup);
1016-
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
1027+
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
10171028
}
10181029
}
10191030

@@ -1022,7 +1033,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10221033
// Since vpush register list cannot have gaps, there may be multiple vpush
10231034
// instructions in the prologue.
10241035
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
1025-
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
1036+
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI), BeforeFPPush);
10261037
LastPush = MBBI++;
10271038
}
10281039
}
@@ -1041,7 +1052,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
10411052
// Move GPRCS2, if using using SplitR11WindowsSEH.
10421053
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
10431054
GPRCS2Push = LastPush = MBBI++;
1044-
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
1055+
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
1056+
if (FramePtrSpillArea == SpillArea::GPRCS2)
1057+
BeforeFPPush = false;
10451058
}
10461059

10471060
bool NeedsWinCFIStackAlloc = NeedsWinCFI;
@@ -1142,28 +1155,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
11421155
// into spill area 1, including the FP in R11. In either case, it
11431156
// is in area one and the adjustment needs to take place just after
11441157
// that push.
1145-
// FIXME: The above is not necessary true when PACBTI is enabled.
1146-
// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1147-
// so FP ends up on area two.
11481158
MachineBasicBlock::iterator AfterPush;
11491159
if (HasFP) {
1150-
AfterPush = std::next(GPRCS1Push);
1151-
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1152-
int FPOffset = PushSize + FramePtrOffsetInPush;
1153-
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
1154-
AfterPush = std::next(GPRCS2Push);
1155-
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1156-
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1157-
} else {
1158-
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1159-
FramePtr, ARM::SP, FPOffset,
1160-
MachineInstr::FrameSetup);
1160+
MachineBasicBlock::iterator FPPushInst;
1161+
// Offset from SP immediately after the push which saved the FP to the FP
1162+
// save slot.
1163+
int64_t FPOffsetAfterPush;
1164+
switch (FramePtrSpillArea) {
1165+
case SpillArea::GPRCS1:
1166+
FPPushInst = GPRCS1Push;
1167+
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1168+
ArgRegsSaveSize + FPCXTSaveSize +
1169+
sizeOfSPAdjustment(*FPPushInst);
1170+
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1171+
<< FPOffsetAfterPush << " after that push\n");
1172+
break;
1173+
case SpillArea::GPRCS2:
1174+
FPPushInst = GPRCS2Push;
1175+
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
1176+
ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1177+
sizeOfSPAdjustment(*FPPushInst);
1178+
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1179+
FPOffsetAfterPush += DPRCSSize + DPRGapSize;
1180+
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1181+
<< FPOffsetAfterPush << " after that push\n");
1182+
break;
1183+
default:
1184+
llvm_unreachable("frame pointer in unknown spill area");
1185+
break;
11611186
}
1187+
AfterPush = std::next(FPPushInst);
1188+
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1189+
assert(FPOffsetAfterPush == 0);
1190+
1191+
// Emit the MOV or ADD to set up the frame pointer register.
1192+
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1193+
FramePtr, ARM::SP, FPOffsetAfterPush,
1194+
MachineInstr::FrameSetup);
1195+
11621196
if (!NeedsWinCFI) {
1163-
if (FramePtrOffsetInPush + PushSize != 0) {
1197+
// Emit DWARF info to find the CFA using the frame pointer from this
1198+
// point onward.
1199+
if (FPOffsetAfterPush != 0) {
11641200
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
11651201
nullptr, MRI->getDwarfRegNum(FramePtr, true),
1166-
FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1202+
-MFI.getObjectOffset(FramePtrSpillFI)));
11671203
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
11681204
.addCFIIndex(CFIIndex)
11691205
.setMIFlags(MachineInstr::FrameSetup);
@@ -1675,7 +1711,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
16751711
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
16761712
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
16771713
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1678-
PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
1714+
(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1715+
PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
16791716
Reg = ARM::PC;
16801717
// Fold the return instruction into the LDM.
16811718
DeleteRet = true;
@@ -2907,18 +2944,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
29072944
const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
29082945
if (AFI.shouldSignReturnAddress()) {
29092946
// The order of register must match the order we push them, because the
2910-
// PEI assigns frame indices in that order. When compiling for return
2911-
// address sign and authenication, we use split push, therefore the orders
2912-
// we want are:
2913-
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2914-
CSI.insert(find_if(CSI,
2915-
[=](const auto &CS) {
2916-
Register Reg = CS.getReg();
2917-
return Reg == ARM::R10 || Reg == ARM::R11 ||
2918-
Reg == ARM::R8 || Reg == ARM::R9 ||
2919-
ARM::DPRRegClass.contains(Reg);
2920-
}),
2921-
CalleeSavedInfo(ARM::R12));
2947+
// PEI assigns frame indices in that order. That order depends on the
2948+
// PushPopSplitVariation, there are only two cases which we use with return
2949+
// address signing:
2950+
switch (STI.getPushPopSplitVariation(MF)) {
2951+
case ARMSubtarget::SplitR7:
2952+
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2953+
CSI.insert(find_if(CSI,
2954+
[=](const auto &CS) {
2955+
Register Reg = CS.getReg();
2956+
return Reg == ARM::R10 || Reg == ARM::R11 ||
2957+
Reg == ARM::R8 || Reg == ARM::R9 ||
2958+
ARM::DPRRegClass.contains(Reg);
2959+
}),
2960+
CalleeSavedInfo(ARM::R12));
2961+
break;
2962+
case ARMSubtarget::SplitR11AAPCSSignRA:
2963+
// With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
2964+
// on the stack.
2965+
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
2966+
break;
2967+
default:
2968+
llvm_unreachable("Unexpected CSR split with return address signing");
2969+
}
29222970
}
29232971

29242972
return false;

llvm/lib/Target/ARM/ARMSubtarget.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
514514
F.needsUnwindTableEntry() &&
515515
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
516516
return SplitR11WindowsSEH;
517+
518+
// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
519+
// other in the list of callee saved registers in a frame, and branch
520+
// signing is enabled.
521+
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
522+
getFramePointerReg() == ARM::R11)
523+
return SplitR11AAPCSSignRA;
517524
return NoSplit;
518525
}

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
105105
/// vpush {d8-d15}
106106
/// push {r11, lr}
107107
SplitR11WindowsSEH,
108+
109+
/// When generating AAPCS-compilant frame chains, R11 is the frame pointer,
110+
/// and must be pushed adjacent to the return address (LR). Normally this
111+
/// isn't a problem, because the only register between them is r12, which is
112+
/// the intra-procedure-call scratch register, so doesn't need to be saved.
113+
/// However, when PACBTI is in use, r12 contains the authentication code, so
114+
/// does need to be saved. This means that we need a separate push for R11
115+
/// and LR.
116+
/// push {r0-r10, r12}
117+
/// push {r11, lr}
118+
/// vpush {d8-d15}
119+
SplitR11AAPCSSignRA,
108120
};
109121

110122
protected:

0 commit comments

Comments
 (0)