240
240
#include " llvm/Support/CommandLine.h"
241
241
#include " llvm/Support/Debug.h"
242
242
#include " llvm/Support/ErrorHandling.h"
243
+ #include " llvm/Support/FormatVariadic.h"
243
244
#include " llvm/Support/MathExtras.h"
244
245
#include " llvm/Support/raw_ostream.h"
245
246
#include " llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
275
276
// Stack hazard padding size. 0 = disabled.
276
277
static cl::opt<unsigned > StackHazardSize (" aarch64-stack-hazard-size" ,
277
278
cl::init (0 ), cl::Hidden);
279
+ // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280
+ static cl::opt<unsigned >
281
+ StackHazardRemarkSize (" aarch64-stack-hazard-remark-size" , cl::init(0 ),
282
+ cl::Hidden);
278
283
// Whether to insert padding into non-streaming functions (for testing).
279
284
static cl::opt<bool >
280
285
StackHazardInNonStreaming (" aarch64-stack-hazard-in-non-streaming" ,
@@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
2615
2620
const auto &MFI = MF.getFrameInfo ();
2616
2621
2617
2622
int64_t ObjectOffset = MFI.getObjectOffset (FI);
2623
+ StackOffset SVEStackSize = getSVEStackSize (MF);
2624
+
2625
+ // For VLA-area objects, just emit an offset at the end of the stack frame.
2626
+ // Whilst not quite correct, these objects do live at the end of the frame and
2627
+ // so it is more useful for analysis for the offset to reflect this.
2628
+ if (MFI.isVariableSizedObjectIndex (FI)) {
2629
+ return StackOffset::getFixed (-((int64_t )MFI.getStackSize ())) - SVEStackSize;
2630
+ }
2618
2631
2619
2632
// This is correct in the absence of any SVE stack objects.
2620
- StackOffset SVEStackSize = getSVEStackSize (MF);
2621
2633
if (!SVEStackSize)
2622
2634
return StackOffset::getFixed (ObjectOffset - getOffsetOfLocalArea ());
2623
2635
@@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
3528
3540
return true ;
3529
3541
}
3530
3542
3531
- // Return the FrameID for a Load/Store instruction by looking at the MMO.
3532
- static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3533
- const MachineFrameInfo &MFI) {
3534
- if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3535
- return std::nullopt;
3536
-
3537
- MachineMemOperand *MMO = *MI.memoperands_begin ();
3543
+ // Return the FrameID for a MMO.
3544
+ static std::optional<int > getMMOFrameID (MachineMemOperand *MMO,
3545
+ const MachineFrameInfo &MFI) {
3538
3546
auto *PSV =
3539
3547
dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue ());
3540
3548
if (PSV)
@@ -3552,6 +3560,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
3552
3560
return std::nullopt;
3553
3561
}
3554
3562
3563
+ // Return the FrameID for a Load/Store instruction by looking at the first MMO.
3564
+ static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3565
+ const MachineFrameInfo &MFI) {
3566
+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3567
+ return std::nullopt;
3568
+
3569
+ return getMMOFrameID (*MI.memoperands_begin (), MFI);
3570
+ }
3571
+
3555
3572
// Check if a Hazard slot is needed for the current function, and if so create
3556
3573
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
3557
3574
// which can be used to determine if any hazard padding is needed.
@@ -5029,3 +5046,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
5029
5046
MI->eraseFromParent ();
5030
5047
}
5031
5048
}
5049
+
5050
+ struct StackAccess {
5051
+ enum AccessType {
5052
+ NotAccessed = 0 , // Stack object not accessed by load/store instructions.
5053
+ GPR = 1 << 0 , // A general purpose register.
5054
+ PPR = 1 << 1 , // A predicate register.
5055
+ FPR = 1 << 2 , // A floating point/Neon/SVE register.
5056
+ };
5057
+
5058
+ int Idx;
5059
+ StackOffset Offset;
5060
+ int64_t Size ;
5061
+ unsigned AccessTypes;
5062
+
5063
+ StackAccess () : Idx(0 ), Offset(), Size (0 ), AccessTypes(NotAccessed) {}
5064
+
5065
+ bool operator <(const StackAccess &Rhs) const {
5066
+ return std::make_tuple (start (), Idx) <
5067
+ std::make_tuple (Rhs.start (), Rhs.Idx );
5068
+ }
5069
+
5070
+ bool isCPU () const {
5071
+ // Predicate register load and store instructions execute on the CPU.
5072
+ return AccessTypes & (AccessType::GPR | AccessType::PPR);
5073
+ }
5074
+ bool isSME () const { return AccessTypes & AccessType::FPR; }
5075
+ bool isMixed () const { return isCPU () && isSME (); }
5076
+
5077
+ int64_t start () const { return Offset.getFixed () + Offset.getScalable (); }
5078
+ int64_t end () const { return start () + Size ; }
5079
+
5080
+ std::string getTypeString () const {
5081
+ switch (AccessTypes) {
5082
+ case AccessType::FPR:
5083
+ return " FPR" ;
5084
+ case AccessType::PPR:
5085
+ return " PPR" ;
5086
+ case AccessType::GPR:
5087
+ return " GPR" ;
5088
+ case AccessType::NotAccessed:
5089
+ return " NA" ;
5090
+ default :
5091
+ return " Mixed" ;
5092
+ }
5093
+ }
5094
+
5095
+ void print (raw_ostream &OS) const {
5096
+ OS << getTypeString () << " stack object at [SP"
5097
+ << (Offset.getFixed () < 0 ? " " : " +" ) << Offset.getFixed ();
5098
+ if (Offset.getScalable ())
5099
+ OS << (Offset.getScalable () < 0 ? " " : " +" ) << Offset.getScalable ()
5100
+ << " * vscale" ;
5101
+ OS << " ]" ;
5102
+ }
5103
+ };
5104
+
5105
+ static inline raw_ostream &operator <<(raw_ostream &OS, const StackAccess &SA) {
5106
+ SA.print (OS);
5107
+ return OS;
5108
+ }
5109
+
5110
+ void AArch64FrameLowering::emitRemarks (
5111
+ const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
5112
+
5113
+ SMEAttrs Attrs (MF.getFunction ());
5114
+ if (Attrs.hasNonStreamingInterfaceAndBody ())
5115
+ return ;
5116
+
5117
+ const uint64_t HazardSize =
5118
+ (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
5119
+
5120
+ if (HazardSize == 0 )
5121
+ return ;
5122
+
5123
+ const MachineFrameInfo &MFI = MF.getFrameInfo ();
5124
+ // Bail if function has no stack objects.
5125
+ if (!MFI.hasStackObjects ())
5126
+ return ;
5127
+
5128
+ std::vector<StackAccess> StackAccesses (MFI.getNumObjects ());
5129
+
5130
+ size_t NumFPLdSt = 0 ;
5131
+ size_t NumNonFPLdSt = 0 ;
5132
+
5133
+ // Collect stack accesses via Load/Store instructions.
5134
+ for (const MachineBasicBlock &MBB : MF) {
5135
+ for (const MachineInstr &MI : MBB) {
5136
+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
5137
+ continue ;
5138
+ for (MachineMemOperand *MMO : MI.memoperands ()) {
5139
+ std::optional<int > FI = getMMOFrameID (MMO, MFI);
5140
+ if (FI && !MFI.isDeadObjectIndex (*FI)) {
5141
+ int FrameIdx = *FI;
5142
+
5143
+ size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects ();
5144
+ if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
5145
+ StackAccesses[ArrIdx].Idx = FrameIdx;
5146
+ StackAccesses[ArrIdx].Offset =
5147
+ getFrameIndexReferenceFromSP (MF, FrameIdx);
5148
+ StackAccesses[ArrIdx].Size = MFI.getObjectSize (FrameIdx);
5149
+ }
5150
+
5151
+ unsigned RegTy = StackAccess::AccessType::GPR;
5152
+ if (MFI.getStackID (FrameIdx) == TargetStackID::ScalableVector) {
5153
+ if (AArch64::PPRRegClass.contains (MI.getOperand (0 ).getReg ()))
5154
+ RegTy = StackAccess::PPR;
5155
+ else
5156
+ RegTy = StackAccess::FPR;
5157
+ } else if (AArch64InstrInfo::isFpOrNEON (MI)) {
5158
+ RegTy = StackAccess::FPR;
5159
+ }
5160
+
5161
+ StackAccesses[ArrIdx].AccessTypes |= RegTy;
5162
+
5163
+ if (RegTy == StackAccess::FPR)
5164
+ ++NumFPLdSt;
5165
+ else
5166
+ ++NumNonFPLdSt;
5167
+ }
5168
+ }
5169
+ }
5170
+ }
5171
+
5172
+ if (NumFPLdSt == 0 || NumNonFPLdSt == 0 )
5173
+ return ;
5174
+
5175
+ llvm::sort (StackAccesses);
5176
+ StackAccesses.erase (llvm::remove_if (StackAccesses,
5177
+ [](const StackAccess &S) {
5178
+ return S.AccessTypes ==
5179
+ StackAccess::NotAccessed;
5180
+ }),
5181
+ StackAccesses.end ());
5182
+
5183
+ SmallVector<const StackAccess *> MixedObjects;
5184
+ SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
5185
+
5186
+ if (StackAccesses.front ().isMixed ())
5187
+ MixedObjects.push_back (&StackAccesses.front ());
5188
+
5189
+ for (auto It = StackAccesses.begin (), End = std::prev (StackAccesses.end ());
5190
+ It != End; ++It) {
5191
+ const auto &First = *It;
5192
+ const auto &Second = *(It + 1 );
5193
+
5194
+ if (Second.isMixed ())
5195
+ MixedObjects.push_back (&Second);
5196
+
5197
+ if ((First.isSME () && Second.isCPU ()) ||
5198
+ (First.isCPU () && Second.isSME ())) {
5199
+ uint64_t Distance = static_cast <uint64_t >(Second.start () - First.end ());
5200
+ if (Distance < HazardSize)
5201
+ HazardPairs.emplace_back (&First, &Second);
5202
+ }
5203
+ }
5204
+
5205
+ auto EmitRemark = [&](llvm::StringRef Str) {
5206
+ ORE->emit ([&]() {
5207
+ auto R = MachineOptimizationRemarkAnalysis (
5208
+ " sme" , " StackHazard" , MF.getFunction ().getSubprogram (), &MF.front ());
5209
+ return R << formatv (" stack hazard in '{0}': " , MF.getName ()).str () << Str;
5210
+ });
5211
+ };
5212
+
5213
+ for (const auto &P : HazardPairs)
5214
+ EmitRemark (formatv (" {0} is too close to {1}" , *P.first , *P.second ).str ());
5215
+
5216
+ for (const auto *Obj : MixedObjects)
5217
+ EmitRemark (
5218
+ formatv (" {0} accessed by both GP and FP instructions" , *Obj).str ());
5219
+ }
0 commit comments