33
33
#include " llvm/ADT/Sequence.h"
34
34
#include " llvm/Analysis/AliasAnalysis.h"
35
35
#include " llvm/CodeGen/MachineLoopInfo.h"
36
+ #include " llvm/CodeGen/MachinePassManager.h"
36
37
#include " llvm/CodeGen/MachinePostDominators.h"
37
38
#include " llvm/Support/DebugCounter.h"
38
39
#include " llvm/TargetParser/TargetParser.h"
@@ -597,7 +598,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
597
598
AMDGPU::Waitcnt getAllZeroWaitcnt (bool IncludeVSCnt) const override ;
598
599
};
599
600
600
- class SIInsertWaitcnts : public MachineFunctionPass {
601
+ class SIInsertWaitcnts {
601
602
private:
602
603
const GCNSubtarget *ST = nullptr ;
603
604
const SIInstrInfo *TII = nullptr ;
@@ -636,9 +637,9 @@ class SIInsertWaitcnts : public MachineFunctionPass {
636
637
InstCounterType MaxCounter = NUM_NORMAL_INST_CNTS;
637
638
638
639
public:
639
- static char ID;
640
-
641
- SIInsertWaitcnts () : MachineFunctionPass(ID ) {
640
+ SIInsertWaitcnts (MachineLoopInfo *MLI, MachinePostDominatorTree *PDT,
641
+ AliasAnalysis *AA)
642
+ : MLI(MLI), PDT(PDT), AA(AA ) {
642
643
(void )ForceExpCounter;
643
644
(void )ForceLgkmCounter;
644
645
(void )ForceVMCounter;
@@ -648,20 +649,7 @@ class SIInsertWaitcnts : public MachineFunctionPass {
648
649
bool isPreheaderToFlush (MachineBasicBlock &MBB,
649
650
WaitcntBrackets &ScoreBrackets);
650
651
bool isVMEMOrFlatVMEM (const MachineInstr &MI) const ;
651
- bool runOnMachineFunction (MachineFunction &MF) override ;
652
-
653
- StringRef getPassName () const override {
654
- return " SI insert wait instructions" ;
655
- }
656
-
657
- void getAnalysisUsage (AnalysisUsage &AU) const override {
658
- AU.setPreservesCFG ();
659
- AU.addRequired <MachineLoopInfoWrapperPass>();
660
- AU.addRequired <MachinePostDominatorTreeWrapperPass>();
661
- AU.addUsedIfAvailable <AAResultsWrapperPass>();
662
- AU.addPreserved <AAResultsWrapperPass>();
663
- MachineFunctionPass::getAnalysisUsage (AU);
664
- }
652
+ bool run (MachineFunction &MF);
665
653
666
654
bool isForceEmitWaitcnt () const {
667
655
for (auto T : inst_counter_types ())
@@ -749,6 +737,27 @@ class SIInsertWaitcnts : public MachineFunctionPass {
749
737
WaitcntBrackets &ScoreBrackets);
750
738
};
751
739
740
+ class SIInsertWaitcntsLegacy : public MachineFunctionPass {
741
+ public:
742
+ static char ID;
743
+ SIInsertWaitcntsLegacy () : MachineFunctionPass(ID) {}
744
+
745
+ bool runOnMachineFunction (MachineFunction &MF) override ;
746
+
747
+ StringRef getPassName () const override {
748
+ return " SI insert wait instructions" ;
749
+ }
750
+
751
+ void getAnalysisUsage (AnalysisUsage &AU) const override {
752
+ AU.setPreservesCFG ();
753
+ AU.addRequired <MachineLoopInfoWrapperPass>();
754
+ AU.addRequired <MachinePostDominatorTreeWrapperPass>();
755
+ AU.addUsedIfAvailable <AAResultsWrapperPass>();
756
+ AU.addPreserved <AAResultsWrapperPass>();
757
+ MachineFunctionPass::getAnalysisUsage (AU);
758
+ }
759
+ };
760
+
752
761
} // end anonymous namespace
753
762
754
763
RegInterval WaitcntBrackets::getRegInterval (const MachineInstr *MI,
@@ -1133,19 +1142,19 @@ bool WaitcntBrackets::counterOutOfOrder(InstCounterType T) const {
1133
1142
return hasMixedPendingEvents (T);
1134
1143
}
1135
1144
1136
- INITIALIZE_PASS_BEGIN (SIInsertWaitcnts , DEBUG_TYPE, " SI Insert Waitcnts" , false ,
1137
- false )
1145
+ INITIALIZE_PASS_BEGIN (SIInsertWaitcntsLegacy , DEBUG_TYPE, " SI Insert Waitcnts" ,
1146
+ false , false )
1138
1147
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
1139
1148
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
1140
- INITIALIZE_PASS_END(SIInsertWaitcnts , DEBUG_TYPE, " SI Insert Waitcnts" , false ,
1141
- false )
1149
+ INITIALIZE_PASS_END(SIInsertWaitcntsLegacy , DEBUG_TYPE, " SI Insert Waitcnts" ,
1150
+ false , false )
1142
1151
1143
- char SIInsertWaitcnts ::ID = 0;
1152
+ char SIInsertWaitcntsLegacy ::ID = 0;
1144
1153
1145
- char &llvm::SIInsertWaitcntsID = SIInsertWaitcnts ::ID;
1154
+ char &llvm::SIInsertWaitcntsID = SIInsertWaitcntsLegacy ::ID;
1146
1155
1147
1156
FunctionPass *llvm::createSIInsertWaitcntsPass () {
1148
- return new SIInsertWaitcnts ();
1157
+ return new SIInsertWaitcntsLegacy ();
1149
1158
}
1150
1159
1151
1160
static bool updateOperandIfDifferent (MachineInstr &MI, AMDGPU::OpName OpName,
@@ -2481,16 +2490,40 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
2481
2490
return HasVMemLoad && UsesVgprLoadedOutside && ST->hasVmemWriteVgprInOrder ();
2482
2491
}
2483
2492
2484
- bool SIInsertWaitcnts::runOnMachineFunction (MachineFunction &MF) {
2493
+ bool SIInsertWaitcntsLegacy::runOnMachineFunction (MachineFunction &MF) {
2494
+ auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI ();
2495
+ auto *PDT =
2496
+ &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree ();
2497
+ AliasAnalysis *AA = nullptr ;
2498
+ if (auto *AAR = getAnalysisIfAvailable<AAResultsWrapperPass>())
2499
+ AA = &AAR->getAAResults ();
2500
+
2501
+ return SIInsertWaitcnts (MLI, PDT, AA).run (MF);
2502
+ }
2503
+
2504
+ PreservedAnalyses
2505
+ SIInsertWaitcntsPass::run (MachineFunction &MF,
2506
+ MachineFunctionAnalysisManager &MFAM) {
2507
+ auto *MLI = &MFAM.getResult <MachineLoopAnalysis>(MF);
2508
+ auto *PDT = &MFAM.getResult <MachinePostDominatorTreeAnalysis>(MF);
2509
+ auto *AA = MFAM.getResult <FunctionAnalysisManagerMachineFunctionProxy>(MF)
2510
+ .getManager ()
2511
+ .getCachedResult <AAManager>(MF.getFunction ());
2512
+
2513
+ if (!SIInsertWaitcnts (MLI, PDT, AA).run (MF))
2514
+ return PreservedAnalyses::all ();
2515
+
2516
+ return getMachineFunctionPassPreservedAnalyses ()
2517
+ .preserveSet <CFGAnalyses>()
2518
+ .preserve <AAManager>();
2519
+ }
2520
+
2521
+ bool SIInsertWaitcnts::run (MachineFunction &MF) {
2485
2522
ST = &MF.getSubtarget <GCNSubtarget>();
2486
2523
TII = ST->getInstrInfo ();
2487
2524
TRI = &TII->getRegisterInfo ();
2488
2525
MRI = &MF.getRegInfo ();
2489
2526
const SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
2490
- MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI ();
2491
- PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree ();
2492
- if (auto *AAR = getAnalysisIfAvailable<AAResultsWrapperPass>())
2493
- AA = &AAR->getAAResults ();
2494
2527
2495
2528
AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion (ST->getCPU ());
2496
2529
0 commit comments