|
126 | 126 | // and the SME unit try to access the same area of memory, including if the
|
127 | 127 | // access is to an area of the stack. To try to alleviate this we attempt to
|
128 | 128 | // introduce extra padding into the stack frame between FP and GPR accesses,
|
129 |
| -// controlled by the StackHazardSize option. Without changing the layout of the |
130 |
| -// stack frame in the diagram above, a stack object of size StackHazardSize is |
131 |
| -// added between GPR and FPR CSRs. Another is added to the stack objects |
132 |
| -// section, and stack objects are sorted so that FPR > Hazard padding slot > |
133 |
| -// GPRs (where possible). Unfortunately some things are not handled well (VLA |
134 |
| -// area, arguments on the stack, object with both GPR and FPR accesses), but if |
135 |
| -// those are controlled by the user then the entire stack frame becomes GPR at |
136 |
| -// the start/end with FPR in the middle, surrounded by Hazard padding. |
| 129 | +// controlled by the aarch64-stack-hazard-size option. Without changing the |
| 130 | +// layout of the stack frame in the diagram above, a stack object of size |
| 131 | +// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added |
| 132 | +// to the stack objects section, and stack objects are sorted so that FPR > |
| 133 | +// Hazard padding slot > GPRs (where possible). Unfortunately some things are |
| 134 | +// not handled well (VLA area, arguments on the stack, objects with both GPR and |
| 135 | +// FPR accesses), but if those are controlled by the user then the entire stack |
| 136 | +// frame becomes GPR at the start/end with FPR in the middle, surrounded by |
| 137 | +// Hazard padding. |
137 | 138 | //
|
138 | 139 | // An example of the prologue:
|
139 | 140 | //
|
@@ -273,9 +274,6 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
|
273 | 274 | cl::desc("Emit homogeneous prologue and epilogue for the size "
|
274 | 275 | "optimization (default = off)"));
|
275 | 276 |
|
276 |
| -// Stack hazard padding size. 0 = disabled. |
277 |
| -static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size", |
278 |
| - cl::init(0), cl::Hidden); |
279 | 277 | // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
|
280 | 278 | static cl::opt<unsigned>
|
281 | 279 | StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
|
@@ -1617,6 +1615,10 @@ static bool isTargetWindows(const MachineFunction &MF) {
|
1617 | 1615 | return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
|
1618 | 1616 | }
|
1619 | 1617 |
|
| 1618 | +static unsigned getStackHazardSize(const MachineFunction &MF) { |
| 1619 | + return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize(); |
| 1620 | +} |
| 1621 | + |
1620 | 1622 | // Convenience function to determine whether I is an SVE callee save.
|
1621 | 1623 | static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
|
1622 | 1624 | switch (I->getOpcode()) {
|
@@ -2988,6 +2990,7 @@ static void computeCalleeSaveRegisterPairs(
|
2988 | 2990 | bool IsWindows = isTargetWindows(MF);
|
2989 | 2991 | bool NeedsWinCFI = needsWinCFI(MF);
|
2990 | 2992 | AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
| 2993 | + unsigned StackHazardSize = getStackHazardSize(MF); |
2991 | 2994 | MachineFrameInfo &MFI = MF.getFrameInfo();
|
2992 | 2995 | CallingConv::ID CC = MF.getFunction().getCallingConv();
|
2993 | 2996 | unsigned Count = CSI.size();
|
@@ -3615,6 +3618,7 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
|
3615 | 3618 | // which can be used to determine if any hazard padding is needed.
|
3616 | 3619 | void AArch64FrameLowering::determineStackHazardSlot(
|
3617 | 3620 | MachineFunction &MF, BitVector &SavedRegs) const {
|
| 3621 | + unsigned StackHazardSize = getStackHazardSize(MF); |
3618 | 3622 | if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
|
3619 | 3623 | MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
|
3620 | 3624 | return;
|
@@ -3805,7 +3809,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
3805 | 3809 | // StackHazardSize if so.
|
3806 | 3810 | determineStackHazardSlot(MF, SavedRegs);
|
3807 | 3811 | if (AFI->hasStackHazardSlotIndex())
|
3808 |
| - CSStackSize += StackHazardSize; |
| 3812 | + CSStackSize += getStackHazardSize(MF); |
3809 | 3813 |
|
3810 | 3814 | // Save number of saved regs, so we can easily update CSStackSize later.
|
3811 | 3815 | unsigned NumSavedRegs = SavedRegs.count();
|
@@ -3920,6 +3924,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
|
3920 | 3924 | std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
|
3921 | 3925 | unsigned &MaxCSFrameIndex) const {
|
3922 | 3926 | bool NeedsWinCFI = needsWinCFI(MF);
|
| 3927 | + unsigned StackHazardSize = getStackHazardSize(MF); |
3923 | 3928 | // To match the canonical windows frame layout, reverse the list of
|
3924 | 3929 | // callee saved registers to get them laid out by PrologEpilogInserter
|
3925 | 3930 | // in the right order. (PrologEpilogInserter allocates stack objects top
|
@@ -5154,6 +5159,7 @@ void AArch64FrameLowering::emitRemarks(
|
5154 | 5159 | if (Attrs.hasNonStreamingInterfaceAndBody())
|
5155 | 5160 | return;
|
5156 | 5161 |
|
| 5162 | + unsigned StackHazardSize = getStackHazardSize(MF); |
5157 | 5163 | const uint64_t HazardSize =
|
5158 | 5164 | (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
|
5159 | 5165 |
|
|
0 commit comments