@@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
199
199
// push {r0-r10, r12} GPRCS1
200
200
// vpush {r8-d15} DPRCS1
201
201
// push {r11, lr} GPRCS2
202
+ //
203
+ // SplitR11AAPCSSignRA:
204
+ // push {r0-r10, r12} GPRSC1
205
+ // push {r11, lr} GPRCS2
206
+ // vpush {r8-d15} DPRCS1
202
207
203
208
// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
204
209
// the top of the stack frame.
@@ -238,7 +243,8 @@ SpillArea getSpillArea(Register Reg,
238
243
return SpillArea::GPRCS1;
239
244
240
245
case ARM::LR:
241
- if (Variation == ARMSubtarget::SplitR11WindowsSEH)
246
+ if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
247
+ Variation == ARMSubtarget::SplitR11AAPCSSignRA)
242
248
return SpillArea::GPRCS2;
243
249
else
244
250
return SpillArea::GPRCS1;
@@ -827,6 +833,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
827
833
// This is a conservative estimation: Assume the frame pointer being r7 and
828
834
// pc("r15") up to r8 getting spilled before (= 8 registers).
829
835
int MaxRegBytes = 8 * 4 ;
836
+ if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
837
+ // Here, r11 can be stored below all of r4-r15.
838
+ MaxRegBytes = 11 * 4 ;
830
839
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
831
840
// Here, r11 can be stored below all of r4-r15 plus d8-d15.
832
841
MaxRegBytes = 11 * 4 + 8 * 8 ;
@@ -899,17 +908,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
899
908
}
900
909
901
910
// Determine spill area sizes, and some important frame indices.
911
+ SpillArea FramePtrSpillArea;
912
+ bool BeforeFPPush = true ;
902
913
for (const CalleeSavedInfo &I : CSI) {
903
914
Register Reg = I.getReg ();
904
915
int FI = I.getFrameIdx ();
905
916
906
- if (Reg == FramePtr)
917
+ SpillArea Area = getSpillArea (Reg, PushPopSplit,
918
+ AFI->getNumAlignedDPRCS2Regs (), RegInfo);
919
+
920
+ if (Reg == FramePtr) {
907
921
FramePtrSpillFI = FI;
922
+ FramePtrSpillArea = Area;
923
+ }
908
924
if (Reg == ARM::D8)
909
925
D8SpillFI = FI;
910
926
911
- switch (getSpillArea (Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs (),
912
- RegInfo)) {
927
+ switch (Area) {
913
928
case SpillArea::FPCXT:
914
929
FPCXTSaveSize += 4 ;
915
930
break ;
@@ -936,21 +951,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
936
951
// Move past FPCXT area.
937
952
if (FPCXTSaveSize > 0 ) {
938
953
LastPush = MBBI++;
939
- DefCFAOffsetCandidates.addInst (LastPush, FPCXTSaveSize, true );
954
+ DefCFAOffsetCandidates.addInst (LastPush, FPCXTSaveSize, BeforeFPPush );
940
955
}
941
956
942
957
// Allocate the vararg register save area.
943
958
if (ArgRegsSaveSize) {
944
959
emitSPUpdate (isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
945
960
MachineInstr::FrameSetup);
946
961
LastPush = std::prev (MBBI);
947
- DefCFAOffsetCandidates.addInst (LastPush, ArgRegsSaveSize, true );
962
+ DefCFAOffsetCandidates.addInst (LastPush, ArgRegsSaveSize, BeforeFPPush );
948
963
}
949
964
950
965
// Move past area 1.
951
966
if (GPRCS1Size > 0 ) {
952
967
GPRCS1Push = LastPush = MBBI++;
953
- DefCFAOffsetCandidates.addInst (LastPush, GPRCS1Size, true );
968
+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS1Size, BeforeFPPush);
969
+ if (FramePtrSpillArea == SpillArea::GPRCS1)
970
+ BeforeFPPush = false ;
954
971
}
955
972
956
973
// Determine starting offsets of spill areas. These offsets are all positive
@@ -974,21 +991,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
974
991
} else {
975
992
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
976
993
}
977
- int FramePtrOffsetInPush = 0 ;
978
994
if (HasFP) {
979
995
// Offset from the CFA to the saved frame pointer, will be negative.
980
996
int FPOffset = MFI.getObjectOffset (FramePtrSpillFI);
981
997
LLVM_DEBUG (dbgs () << " FramePtrSpillFI: " << FramePtrSpillFI
982
998
<< " , FPOffset: " << FPOffset << " \n " );
983
999
assert (getMaxFPOffset (STI, *AFI, MF) <= FPOffset &&
984
1000
" Max FP estimation is wrong" );
985
- // Offset from the top of the GPRCS1 area to the saved frame pointer, will
986
- // be negative.
987
- FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
988
- LLVM_DEBUG (dbgs () << " FramePtrOffsetInPush=" << FramePtrOffsetInPush
989
- << " , FramePtrSpillOffset="
990
- << (MFI.getObjectOffset (FramePtrSpillFI) + NumBytes)
991
- << " \n " );
992
1001
AFI->setFramePtrSpillOffset (MFI.getObjectOffset (FramePtrSpillFI) +
993
1002
NumBytes);
994
1003
}
@@ -1000,7 +1009,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
1000
1009
// after DPRCS1.
1001
1010
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
1002
1011
GPRCS2Push = LastPush = MBBI++;
1003
- DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
1012
+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size, BeforeFPPush);
1013
+ if (FramePtrSpillArea == SpillArea::GPRCS2)
1014
+ BeforeFPPush = false ;
1004
1015
}
1005
1016
1006
1017
// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
@@ -1013,7 +1024,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
1013
1024
else {
1014
1025
emitSPUpdate (isARM, MBB, MBBI, dl, TII, -DPRGapSize,
1015
1026
MachineInstr::FrameSetup);
1016
- DefCFAOffsetCandidates.addInst (std::prev (MBBI), DPRGapSize);
1027
+ DefCFAOffsetCandidates.addInst (std::prev (MBBI), DPRGapSize, BeforeFPPush );
1017
1028
}
1018
1029
}
1019
1030
@@ -1022,7 +1033,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
1022
1033
// Since vpush register list cannot have gaps, there may be multiple vpush
1023
1034
// instructions in the prologue.
1024
1035
while (MBBI != MBB.end () && MBBI->getOpcode () == ARM::VSTMDDB_UPD) {
1025
- DefCFAOffsetCandidates.addInst (MBBI, sizeOfSPAdjustment (*MBBI));
1036
+ DefCFAOffsetCandidates.addInst (MBBI, sizeOfSPAdjustment (*MBBI), BeforeFPPush );
1026
1037
LastPush = MBBI++;
1027
1038
}
1028
1039
}
@@ -1041,7 +1052,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
1041
1052
// Move GPRCS2, if using using SplitR11WindowsSEH.
1042
1053
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
1043
1054
GPRCS2Push = LastPush = MBBI++;
1044
- DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size);
1055
+ DefCFAOffsetCandidates.addInst (LastPush, GPRCS2Size, BeforeFPPush);
1056
+ if (FramePtrSpillArea == SpillArea::GPRCS2)
1057
+ BeforeFPPush = false ;
1045
1058
}
1046
1059
1047
1060
bool NeedsWinCFIStackAlloc = NeedsWinCFI;
@@ -1142,28 +1155,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
1142
1155
// into spill area 1, including the FP in R11. In either case, it
1143
1156
// is in area one and the adjustment needs to take place just after
1144
1157
// that push.
1145
- // FIXME: The above is not necessary true when PACBTI is enabled.
1146
- // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1147
- // so FP ends up on area two.
1148
1158
MachineBasicBlock::iterator AfterPush;
1149
1159
if (HasFP) {
1150
- AfterPush = std::next (GPRCS1Push);
1151
- unsigned PushSize = sizeOfSPAdjustment (*GPRCS1Push);
1152
- int FPOffset = PushSize + FramePtrOffsetInPush;
1153
- if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
1154
- AfterPush = std::next (GPRCS2Push);
1155
- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1156
- FramePtr, ARM::SP, 0 , MachineInstr::FrameSetup);
1157
- } else {
1158
- emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1159
- FramePtr, ARM::SP, FPOffset,
1160
- MachineInstr::FrameSetup);
1160
+ MachineBasicBlock::iterator FPPushInst;
1161
+ // Offset from SP immediately after the push which saved the FP to the FP
1162
+ // save slot.
1163
+ int64_t FPOffsetAfterPush;
1164
+ switch (FramePtrSpillArea) {
1165
+ case SpillArea::GPRCS1:
1166
+ FPPushInst = GPRCS1Push;
1167
+ FPOffsetAfterPush = MFI.getObjectOffset (FramePtrSpillFI) +
1168
+ ArgRegsSaveSize + FPCXTSaveSize +
1169
+ sizeOfSPAdjustment (*FPPushInst);
1170
+ LLVM_DEBUG (dbgs () << " Frame pointer in GPRCS1, offset "
1171
+ << FPOffsetAfterPush << " after that push\n " );
1172
+ break ;
1173
+ case SpillArea::GPRCS2:
1174
+ FPPushInst = GPRCS2Push;
1175
+ FPOffsetAfterPush = MFI.getObjectOffset (FramePtrSpillFI) +
1176
+ ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
1177
+ sizeOfSPAdjustment (*FPPushInst);
1178
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1179
+ FPOffsetAfterPush += DPRCSSize + DPRGapSize;
1180
+ LLVM_DEBUG (dbgs () << " Frame pointer in GPRCS2, offset "
1181
+ << FPOffsetAfterPush << " after that push\n " );
1182
+ break ;
1183
+ default :
1184
+ llvm_unreachable (" frame pointer in unknown spill area" );
1185
+ break ;
1161
1186
}
1187
+ AfterPush = std::next (FPPushInst);
1188
+ if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
1189
+ assert (FPOffsetAfterPush == 0 );
1190
+
1191
+ // Emit the MOV or ADD to set up the frame pointer register.
1192
+ emitRegPlusImmediate (!AFI->isThumbFunction (), MBB, AfterPush, dl, TII,
1193
+ FramePtr, ARM::SP, FPOffsetAfterPush,
1194
+ MachineInstr::FrameSetup);
1195
+
1162
1196
if (!NeedsWinCFI) {
1163
- if (FramePtrOffsetInPush + PushSize != 0 ) {
1197
+ // Emit DWARF info to find the CFA using the frame pointer from this
1198
+ // point onward.
1199
+ if (FPOffsetAfterPush != 0 ) {
1164
1200
unsigned CFIIndex = MF.addFrameInst (MCCFIInstruction::cfiDefCfa (
1165
1201
nullptr , MRI->getDwarfRegNum (FramePtr, true ),
1166
- FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush ));
1202
+ -MFI. getObjectOffset (FramePtrSpillFI) ));
1167
1203
BuildMI (MBB, AfterPush, dl, TII.get (TargetOpcode::CFI_INSTRUCTION))
1168
1204
.addCFIIndex (CFIIndex)
1169
1205
.setMIFlags (MachineInstr::FrameSetup);
@@ -1675,7 +1711,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1675
1711
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1676
1712
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore () == 0 &&
1677
1713
STI.hasV5TOps () && MBB.succ_empty () && !hasPAC &&
1678
- PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
1714
+ (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
1715
+ PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
1679
1716
Reg = ARM::PC;
1680
1717
// Fold the return instruction into the LDM.
1681
1718
DeleteRet = true ;
@@ -2907,18 +2944,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2907
2944
const auto &AFI = *MF.getInfo <ARMFunctionInfo>();
2908
2945
if (AFI.shouldSignReturnAddress ()) {
2909
2946
// The order of register must match the order we push them, because the
2910
- // PEI assigns frame indices in that order. When compiling for return
2911
- // address sign and authenication, we use split push, therefore the orders
2912
- // we want are:
2913
- // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2914
- CSI.insert (find_if (CSI,
2915
- [=](const auto &CS) {
2916
- Register Reg = CS.getReg ();
2917
- return Reg == ARM::R10 || Reg == ARM::R11 ||
2918
- Reg == ARM::R8 || Reg == ARM::R9 ||
2919
- ARM::DPRRegClass.contains (Reg);
2920
- }),
2921
- CalleeSavedInfo (ARM::R12));
2947
+ // PEI assigns frame indices in that order. That order depends on the
2948
+ // PushPopSplitVariation, there are only two cases which we use with return
2949
+ // address signing:
2950
+ switch (STI.getPushPopSplitVariation (MF)) {
2951
+ case ARMSubtarget::SplitR7:
2952
+ // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2953
+ CSI.insert (find_if (CSI,
2954
+ [=](const auto &CS) {
2955
+ Register Reg = CS.getReg ();
2956
+ return Reg == ARM::R10 || Reg == ARM::R11 ||
2957
+ Reg == ARM::R8 || Reg == ARM::R9 ||
2958
+ ARM::DPRRegClass.contains (Reg);
2959
+ }),
2960
+ CalleeSavedInfo (ARM::R12));
2961
+ break ;
2962
+ case ARMSubtarget::SplitR11AAPCSSignRA:
2963
+ // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
2964
+ // on the stack.
2965
+ CSI.insert (CSI.begin (), CalleeSavedInfo (ARM::R12));
2966
+ break ;
2967
+ default :
2968
+ llvm_unreachable (" Unexpected CSR split with return address signing" );
2969
+ }
2922
2970
}
2923
2971
2924
2972
return false ;
0 commit comments