Skip to content

Commit e5419a8

Browse files
MacDuehazzlim
authored andcommitted
[AArch64] Add getStreamingHazardSize() to AArch64Subtarget (llvm#113679)
This is defined by the `-aarch64-streaming-hazard-size` option or its alias `-aarch64-stack-hazard-size` (the original name). It has been renamed to be more general as this option will (for the time being) be used to detect if the current target has streaming mode memory hazards. --------- Co-authored-by: Hari Limaye <[email protected]>
1 parent d3e5d5d commit e5419a8

File tree

3 files changed

+34
-12
lines changed

3 files changed

+34
-12
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,15 @@
126126
// and the SME unit try to access the same area of memory, including if the
127127
// access is to an area of the stack. To try to alleviate this we attempt to
128128
// introduce extra padding into the stack frame between FP and GPR accesses,
129-
// controlled by the StackHazardSize option. Without changing the layout of the
130-
// stack frame in the diagram above, a stack object of size StackHazardSize is
131-
// added between GPR and FPR CSRs. Another is added to the stack objects
132-
// section, and stack objects are sorted so that FPR > Hazard padding slot >
133-
// GPRs (where possible). Unfortunately some things are not handled well (VLA
134-
// area, arguments on the stack, object with both GPR and FPR accesses), but if
135-
// those are controlled by the user then the entire stack frame becomes GPR at
136-
// the start/end with FPR in the middle, surrounded by Hazard padding.
129+
// controlled by the aarch64-stack-hazard-size option. Without changing the
130+
// layout of the stack frame in the diagram above, a stack object of size
131+
// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added
132+
// to the stack objects section, and stack objects are sorted so that FPR >
133+
// Hazard padding slot > GPRs (where possible). Unfortunately some things are
134+
// not handled well (VLA area, arguments on the stack, objects with both GPR and
135+
// FPR accesses), but if those are controlled by the user then the entire stack
136+
// frame becomes GPR at the start/end with FPR in the middle, surrounded by
137+
// Hazard padding.
137138
//
138139
// An example of the prologue:
139140
//
@@ -273,9 +274,6 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
273274
cl::desc("Emit homogeneous prologue and epilogue for the size "
274275
"optimization (default = off)"));
275276

276-
// Stack hazard padding size. 0 = disabled.
277-
static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
278-
cl::init(0), cl::Hidden);
279277
// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280278
static cl::opt<unsigned>
281279
StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
@@ -1617,6 +1615,10 @@ static bool isTargetWindows(const MachineFunction &MF) {
16171615
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
16181616
}
16191617

1618+
static unsigned getStackHazardSize(const MachineFunction &MF) {
1619+
return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
1620+
}
1621+
16201622
// Convenience function to determine whether I is an SVE callee save.
16211623
static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
16221624
switch (I->getOpcode()) {
@@ -2988,6 +2990,7 @@ static void computeCalleeSaveRegisterPairs(
29882990
bool IsWindows = isTargetWindows(MF);
29892991
bool NeedsWinCFI = needsWinCFI(MF);
29902992
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2993+
unsigned StackHazardSize = getStackHazardSize(MF);
29912994
MachineFrameInfo &MFI = MF.getFrameInfo();
29922995
CallingConv::ID CC = MF.getFunction().getCallingConv();
29932996
unsigned Count = CSI.size();
@@ -3615,6 +3618,7 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
36153618
// which can be used to determine if any hazard padding is needed.
36163619
void AArch64FrameLowering::determineStackHazardSlot(
36173620
MachineFunction &MF, BitVector &SavedRegs) const {
3621+
unsigned StackHazardSize = getStackHazardSize(MF);
36183622
if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
36193623
MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
36203624
return;
@@ -3805,7 +3809,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
38053809
// StackHazardSize if so.
38063810
determineStackHazardSlot(MF, SavedRegs);
38073811
if (AFI->hasStackHazardSlotIndex())
3808-
CSStackSize += StackHazardSize;
3812+
CSStackSize += getStackHazardSize(MF);
38093813

38103814
// Save number of saved regs, so we can easily update CSStackSize later.
38113815
unsigned NumSavedRegs = SavedRegs.count();
@@ -3920,6 +3924,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
39203924
std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
39213925
unsigned &MaxCSFrameIndex) const {
39223926
bool NeedsWinCFI = needsWinCFI(MF);
3927+
unsigned StackHazardSize = getStackHazardSize(MF);
39233928
// To match the canonical windows frame layout, reverse the list of
39243929
// callee saved registers to get them laid out by PrologEpilogInserter
39253930
// in the right order. (PrologEpilogInserter allocates stack objects top
@@ -5154,6 +5159,7 @@ void AArch64FrameLowering::emitRemarks(
51545159
if (Attrs.hasNonStreamingInterfaceAndBody())
51555160
return;
51565161

5162+
unsigned StackHazardSize = getStackHazardSize(MF);
51575163
const uint64_t HazardSize =
51585164
(StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
51595165

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
7676
"aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
7777
cl::desc("Set minimum number of entries to use a jump table on AArch64"));
7878

79+
static cl::opt<unsigned> AArch64StreamingHazardSize(
80+
"aarch64-streaming-hazard-size",
81+
cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
82+
cl::init(0), cl::Hidden);
83+
84+
static cl::alias AArch64StreamingStackHazardSize(
85+
"aarch64-stack-hazard-size",
86+
cl::desc("alias for -aarch64-streaming-hazard-size"),
87+
cl::aliasopt(AArch64StreamingHazardSize));
88+
7989
unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
8090
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
8191
return OverrideVectorInsertExtractBaseCost;
@@ -333,6 +343,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
333343
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
334344
IsLittle(LittleEndian), IsStreaming(IsStreaming),
335345
IsStreamingCompatible(IsStreamingCompatible),
346+
StreamingHazardSize(AArch64StreamingHazardSize),
336347
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
337348
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
338349
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
8484

8585
bool IsStreaming;
8686
bool IsStreamingCompatible;
87+
unsigned StreamingHazardSize;
8788
unsigned MinSVEVectorSizeInBits;
8889
unsigned MaxSVEVectorSizeInBits;
8990
unsigned VScaleForTuning = 2;
@@ -172,6 +173,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
172173
/// Returns true if the function has a streaming-compatible body.
173174
bool isStreamingCompatible() const { return IsStreamingCompatible; }
174175

176+
/// Returns the size of memory region that if accessed by both the CPU and
177+
/// the SME unit could result in a hazard. 0 = disabled.
178+
unsigned getStreamingHazardSize() const { return StreamingHazardSize; }
179+
175180
/// Returns true if the target has NEON and the function at runtime is known
176181
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
177182
/// mode, which disables NEON instructions).

0 commit comments

Comments
 (0)