@@ -327,7 +327,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
327
327
static bool produceCompactUnwindFrame (MachineFunction &MF);
328
328
static bool needsWinCFI (const MachineFunction &MF);
329
329
static StackOffset getSVEStackSize (const MachineFunction &MF);
330
- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB);
330
+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB, bool HasCall=false );
331
+ static bool requiresSaveVG (const MachineFunction &MF);
331
332
332
333
// / Returns true if a homogeneous prolog or epilog code can be emitted
333
334
// / for the size optimization. If possible, a frame helper call is injected.
@@ -1002,6 +1003,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
1002
1003
}
1003
1004
}
1004
1005
1006
+ static bool windowsRequiresStackProbe (const MachineFunction &MF,
1007
+ uint64_t StackSizeInBytes) {
1008
+ const AArch64Subtarget &Subtarget = MF.getSubtarget <AArch64Subtarget>();
1009
+ const AArch64FunctionInfo &MFI = *MF.getInfo <AArch64FunctionInfo>();
1010
+ // TODO: When implementing stack protectors, take that into account
1011
+ // for the probe threshold.
1012
+ return Subtarget.isTargetWindows () && MFI.hasStackProbing () &&
1013
+ StackSizeInBytes >= uint64_t (MFI.getStackProbeSize ());
1014
+ }
1015
+
1005
1016
static void getLiveRegsForEntryMBB (LivePhysRegs &LiveRegs,
1006
1017
const MachineBasicBlock &MBB) {
1007
1018
const MachineFunction *MF = MBB.getParent ();
@@ -1023,7 +1034,7 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
1023
1034
// but we would then have to make sure that we were in fact saving at least one
1024
1035
// callee-save register in the prologue, which is additional complexity that
1025
1036
// doesn't seem worth the benefit.
1026
- static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB) {
1037
+ static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB, bool HasCall ) {
1027
1038
MachineFunction *MF = MBB->getParent ();
1028
1039
1029
1040
// If MBB is an entry block, use X9 as the scratch register
@@ -1037,6 +1048,11 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
1037
1048
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo ();
1038
1049
LivePhysRegs LiveRegs (TRI);
1039
1050
getLiveRegsForEntryMBB (LiveRegs, *MBB);
1051
+ if (HasCall) {
1052
+ LiveRegs.addReg (AArch64::X16);
1053
+ LiveRegs.addReg (AArch64::X17);
1054
+ LiveRegs.addReg (AArch64::X18);
1055
+ }
1040
1056
1041
1057
// Prefer X9 since it was historically used for the prologue scratch reg.
1042
1058
const MachineRegisterInfo &MRI = MF->getRegInfo ();
@@ -1077,23 +1093,16 @@ bool AArch64FrameLowering::canUseAsPrologue(
1077
1093
MBB.isLiveIn (AArch64::NZCV))
1078
1094
return false ;
1079
1095
1080
- // Don't need a scratch register if we're not going to re-align the stack or
1081
- // emit stack probes.
1082
- if (!RegInfo->hasStackRealignment (*MF) && !TLI->hasInlineStackProbe (*MF))
1083
- return true ;
1084
- // Otherwise, we can use any block as long as it has a scratch register
1085
- // available.
1086
- return findScratchNonCalleeSaveRegister (TmpMBB) != AArch64::NoRegister;
1087
- }
1096
+ if (RegInfo->hasStackRealignment (*MF) || TLI->hasInlineStackProbe (*MF))
1097
+ if (findScratchNonCalleeSaveRegister (TmpMBB) == AArch64::NoRegister)
1098
+ return false ;
1088
1099
1089
- static bool windowsRequiresStackProbe (MachineFunction &MF,
1090
- uint64_t StackSizeInBytes) {
1091
- const AArch64Subtarget &Subtarget = MF.getSubtarget <AArch64Subtarget>();
1092
- const AArch64FunctionInfo &MFI = *MF.getInfo <AArch64FunctionInfo>();
1093
- // TODO: When implementing stack protectors, take that into account
1094
- // for the probe threshold.
1095
- return Subtarget.isTargetWindows () && MFI.hasStackProbing () &&
1096
- StackSizeInBytes >= uint64_t (MFI.getStackProbeSize ());
1100
+ // May need a scratch register (for return value) if require making a special call
1101
+ if (requiresSaveVG (*MF) || windowsRequiresStackProbe (*MF, std::numeric_limits<uint64_t >::max ()))
1102
+ if (findScratchNonCalleeSaveRegister (TmpMBB, true ) == AArch64::NoRegister)
1103
+ return false ;
1104
+
1105
+ return true ;
1097
1106
}
1098
1107
1099
1108
static bool needsWinCFI (const MachineFunction &MF) {
@@ -1356,8 +1365,8 @@ bool requiresGetVGCall(MachineFunction &MF) {
1356
1365
!MF.getSubtarget <AArch64Subtarget>().hasSVE ();
1357
1366
}
1358
1367
1359
- static bool requiresSaveVG (MachineFunction &MF) {
1360
- AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1368
+ static bool requiresSaveVG (const MachineFunction &MF) {
1369
+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
1361
1370
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
1362
1371
// is enabled with streaming mode changes.
1363
1372
if (!AFI->hasStreamingModeChanges ())
@@ -1991,8 +2000,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
1991
2000
return STI.getRegisterInfo ()->isSuperOrSubRegisterEq (
1992
2001
AArch64::X15, LiveIn.PhysReg );
1993
2002
})) {
1994
- X15Scratch = findScratchNonCalleeSaveRegister (&MBB);
1995
- assert (X15Scratch != AArch64::NoRegister);
2003
+ X15Scratch = findScratchNonCalleeSaveRegister (&MBB, true );
2004
+ assert (X15Scratch != AArch64::NoRegister && (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17) );
1996
2005
#ifndef NDEBUG
1997
2006
LiveRegs.removeReg (AArch64::X15); // ignore X15 since we restore it
1998
2007
#endif
@@ -3236,7 +3245,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
3236
3245
unsigned X0Scratch = AArch64::NoRegister;
3237
3246
if (Reg1 == AArch64::VG) {
3238
3247
// Find an available register to store value of VG to.
3239
- Reg1 = findScratchNonCalleeSaveRegister (&MBB);
3248
+ Reg1 = findScratchNonCalleeSaveRegister (&MBB, true );
3240
3249
assert (Reg1 != AArch64::NoRegister);
3241
3250
SMEAttrs Attrs (MF.getFunction ());
3242
3251
0 commit comments