Skip to content

Commit d0aa2c4

Browse files
committed
choose scratch register more carefully
1 parent 4053196 commit d0aa2c4

File tree

1 file changed

+32
-23
lines changed

1 file changed

+32
-23
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
327327
static bool produceCompactUnwindFrame(MachineFunction &MF);
328328
static bool needsWinCFI(const MachineFunction &MF);
329329
static StackOffset getSVEStackSize(const MachineFunction &MF);
330-
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
330+
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall=false);
331+
static bool requiresSaveVG(const MachineFunction &MF);
331332

332333
/// Returns true if a homogeneous prolog or epilog code can be emitted
333334
/// for the size optimization. If possible, a frame helper call is injected.
@@ -1002,6 +1003,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
10021003
}
10031004
}
10041005

1006+
static bool windowsRequiresStackProbe(const MachineFunction &MF,
1007+
uint64_t StackSizeInBytes) {
1008+
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1009+
const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
1010+
// TODO: When implementing stack protectors, take that into account
1011+
// for the probe threshold.
1012+
return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
1013+
StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
1014+
}
1015+
10051016
static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
10061017
const MachineBasicBlock &MBB) {
10071018
const MachineFunction *MF = MBB.getParent();
@@ -1023,7 +1034,7 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
10231034
// but we would then have to make sure that we were in fact saving at least one
10241035
// callee-save register in the prologue, which is additional complexity that
10251036
// doesn't seem worth the benefit.
1026-
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
1037+
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, bool HasCall) {
10271038
MachineFunction *MF = MBB->getParent();
10281039

10291040
// If MBB is an entry block, use X9 as the scratch register
@@ -1037,6 +1048,11 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
10371048
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
10381049
LivePhysRegs LiveRegs(TRI);
10391050
getLiveRegsForEntryMBB(LiveRegs, *MBB);
1051+
if (HasCall) {
1052+
LiveRegs.addReg(AArch64::X16);
1053+
LiveRegs.addReg(AArch64::X17);
1054+
LiveRegs.addReg(AArch64::X18);
1055+
}
10401056

10411057
// Prefer X9 since it was historically used for the prologue scratch reg.
10421058
const MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -1077,23 +1093,16 @@ bool AArch64FrameLowering::canUseAsPrologue(
10771093
MBB.isLiveIn(AArch64::NZCV))
10781094
return false;
10791095

1080-
// Don't need a scratch register if we're not going to re-align the stack or
1081-
// emit stack probes.
1082-
if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF))
1083-
return true;
1084-
// Otherwise, we can use any block as long as it has a scratch register
1085-
// available.
1086-
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
1087-
}
1096+
if (RegInfo->hasStackRealignment(*MF) || TLI->hasInlineStackProbe(*MF))
1097+
if (findScratchNonCalleeSaveRegister(TmpMBB) == AArch64::NoRegister)
1098+
return false;
10881099

1089-
static bool windowsRequiresStackProbe(MachineFunction &MF,
1090-
uint64_t StackSizeInBytes) {
1091-
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1092-
const AArch64FunctionInfo &MFI = *MF.getInfo<AArch64FunctionInfo>();
1093-
// TODO: When implementing stack protectors, take that into account
1094-
// for the probe threshold.
1095-
return Subtarget.isTargetWindows() && MFI.hasStackProbing() &&
1096-
StackSizeInBytes >= uint64_t(MFI.getStackProbeSize());
1100+
// May need a scratch register (for return value) if require making a special call
1101+
if (requiresSaveVG(*MF) || windowsRequiresStackProbe(*MF, std::numeric_limits<uint64_t>::max()))
1102+
if (findScratchNonCalleeSaveRegister(TmpMBB, true) == AArch64::NoRegister)
1103+
return false;
1104+
1105+
return true;
10971106
}
10981107

10991108
static bool needsWinCFI(const MachineFunction &MF) {
@@ -1356,8 +1365,8 @@ bool requiresGetVGCall(MachineFunction &MF) {
13561365
!MF.getSubtarget<AArch64Subtarget>().hasSVE();
13571366
}
13581367

1359-
static bool requiresSaveVG(MachineFunction &MF) {
1360-
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1368+
static bool requiresSaveVG(const MachineFunction &MF) {
1369+
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
13611370
// For Darwin platforms we don't save VG for non-SVE functions, even if SME
13621371
// is enabled with streaming mode changes.
13631372
if (!AFI->hasStreamingModeChanges())
@@ -1991,8 +2000,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19912000
return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
19922001
AArch64::X15, LiveIn.PhysReg);
19932002
})) {
1994-
X15Scratch = findScratchNonCalleeSaveRegister(&MBB);
1995-
assert(X15Scratch != AArch64::NoRegister);
2003+
X15Scratch = findScratchNonCalleeSaveRegister(&MBB, true);
2004+
assert(X15Scratch != AArch64::NoRegister && (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17));
19962005
#ifndef NDEBUG
19972006
LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
19982007
#endif
@@ -3236,7 +3245,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
32363245
unsigned X0Scratch = AArch64::NoRegister;
32373246
if (Reg1 == AArch64::VG) {
32383247
// Find an available register to store value of VG to.
3239-
Reg1 = findScratchNonCalleeSaveRegister(&MBB);
3248+
Reg1 = findScratchNonCalleeSaveRegister(&MBB, true);
32403249
assert(Reg1 != AArch64::NoRegister);
32413250
SMEAttrs Attrs(MF.getFunction());
32423251

0 commit comments

Comments
 (0)