Skip to content

Commit 8529238

Browse files
authored
[AMDGPU][NewPM] Port AMDGPUInsertDelayAlu to NPM (#128003)
1 parent 31897e6 commit 8529238

File tree

6 files changed

+85
-21
lines changed

6 files changed

+85
-21
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,12 @@ struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
270270
ScanOptions ScanImpl;
271271
};
272272

273+
struct AMDGPUInsertDelayAluPass
274+
: public PassInfoMixin<AMDGPUInsertDelayAluPass> {
275+
PreservedAnalyses run(MachineFunction &F,
276+
MachineFunctionAnalysisManager &MFAM);
277+
};
278+
273279
Pass *createAMDGPUStructurizeCFGPass();
274280
FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
275281
ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
@@ -416,7 +422,7 @@ extern char &SIMemoryLegalizerID;
416422
void initializeSIModeRegisterPass(PassRegistry&);
417423
extern char &SIModeRegisterID;
418424

419-
void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
425+
void initializeAMDGPUInsertDelayAluLegacyPass(PassRegistry &);
420426
extern char &AMDGPUInsertDelayAluID;
421427

422428
void initializeSIInsertHardClausesPass(PassRegistry &);

llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp

+34-18
Original file line numberDiff line numberDiff line change
@@ -23,22 +23,13 @@ using namespace llvm;
2323

2424
namespace {
2525

26-
class AMDGPUInsertDelayAlu : public MachineFunctionPass {
26+
class AMDGPUInsertDelayAlu {
2727
public:
28-
static char ID;
29-
3028
const SIInstrInfo *SII;
3129
const TargetRegisterInfo *TRI;
3230

3331
const TargetSchedModel *SchedModel;
3432

35-
AMDGPUInsertDelayAlu() : MachineFunctionPass(ID) {}
36-
37-
void getAnalysisUsage(AnalysisUsage &AU) const override {
38-
AU.setPreservesCFG();
39-
MachineFunctionPass::getAnalysisUsage(AU);
40-
}
41-
4233
// Return true if MI waits for all outstanding VALU instructions to complete.
4334
static bool instructionWaitsForVALU(const MachineInstr &MI) {
4435
// These instruction types wait for VA_VDST==0 before issuing.
@@ -416,10 +407,7 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
416407
return Changed;
417408
}
418409

419-
bool runOnMachineFunction(MachineFunction &MF) override {
420-
if (skipFunction(MF.getFunction()))
421-
return false;
422-
410+
bool run(MachineFunction &MF) {
423411
LLVM_DEBUG(dbgs() << "AMDGPUInsertDelayAlu running on " << MF.getName()
424412
<< "\n");
425413

@@ -454,11 +442,39 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
454442
}
455443
};
456444

445+
class AMDGPUInsertDelayAluLegacy : public MachineFunctionPass {
446+
public:
447+
static char ID;
448+
449+
AMDGPUInsertDelayAluLegacy() : MachineFunctionPass(ID) {}
450+
451+
void getAnalysisUsage(AnalysisUsage &AU) const override {
452+
AU.setPreservesCFG();
453+
MachineFunctionPass::getAnalysisUsage(AU);
454+
}
455+
456+
bool runOnMachineFunction(MachineFunction &MF) override {
457+
if (skipFunction(MF.getFunction()))
458+
return false;
459+
AMDGPUInsertDelayAlu Impl;
460+
return Impl.run(MF);
461+
}
462+
};
457463
} // namespace
458464

459-
char AMDGPUInsertDelayAlu::ID = 0;
465+
PreservedAnalyses
466+
AMDGPUInsertDelayAluPass::run(MachineFunction &MF,
467+
MachineFunctionAnalysisManager &MFAM) {
468+
if (!AMDGPUInsertDelayAlu().run(MF))
469+
return PreservedAnalyses::all();
470+
auto PA = getMachineFunctionPassPreservedAnalyses();
471+
PA.preserveSet<CFGAnalyses>();
472+
return PA;
473+
} // end namespace llvm
474+
475+
char AMDGPUInsertDelayAluLegacy::ID = 0;
460476

461-
char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAlu::ID;
477+
char &llvm::AMDGPUInsertDelayAluID = AMDGPUInsertDelayAluLegacy::ID;
462478

463-
INITIALIZE_PASS(AMDGPUInsertDelayAlu, DEBUG_TYPE, "AMDGPU Insert Delay ALU",
464-
false, false)
479+
INITIALIZE_PASS(AMDGPUInsertDelayAluLegacy, DEBUG_TYPE,
480+
"AMDGPU Insert Delay ALU", false, false)

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ FUNCTION_PASS_WITH_PARAMS(
9696
#ifndef MACHINE_FUNCTION_PASS
9797
#define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
9898
#endif
99+
MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
99100
MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
100101
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
101102
MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
@@ -122,7 +123,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
122123
#undef MACHINE_FUNCTION_PASS
123124

124125
#define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
125-
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
126126
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
127127
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
128128
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

+41-1
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
533533
initializeAMDGPURewriteUndefForPHILegacyPass(*PR);
534534
initializeAMDGPUUnifyMetadataPass(*PR);
535535
initializeSIAnnotateControlFlowLegacyPass(*PR);
536-
initializeAMDGPUInsertDelayAluPass(*PR);
536+
initializeAMDGPUInsertDelayAluLegacyPass(*PR);
537537
initializeSIInsertHardClausesPass(*PR);
538538
initializeSIInsertWaitcntsPass(*PR);
539539
initializeSIModeRegisterPass(*PR);
@@ -2147,6 +2147,46 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
21472147
Base::addPostRegAlloc(addPass);
21482148
}
21492149

2150+
void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
2151+
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
2152+
// TODO: addPass(GCNCreateVOPDPass());
2153+
}
2154+
// TODO: addPass(SIMemoryLegalizerPass());
2155+
// TODO: addPass(SIInsertWaitcntsPass());
2156+
2157+
// TODO: addPass(SIModeRegisterPass());
2158+
2159+
if (TM.getOptLevel() > CodeGenOptLevel::None) {
2160+
// TODO: addPass(SIInsertHardClausesPass());
2161+
}
2162+
2163+
// addPass(SILateBranchLoweringPass());
2164+
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) {
2165+
// TODO: addPass(AMDGPUSetWavePriorityPass());
2166+
}
2167+
2168+
if (TM.getOptLevel() > CodeGenOptLevel::None) {
2169+
// TODO: addPass(SIPreEmitPeepholePass());
2170+
}
2171+
2172+
// The hazard recognizer that runs as part of the post-ra scheduler does not
2173+
// guarantee to be able handle all hazards correctly. This is because if there
2174+
// are multiple scheduling regions in a basic block, the regions are scheduled
2175+
// bottom up, so when we begin to schedule a region we don't know what
2176+
// instructions were emitted directly before it.
2177+
//
2178+
// Here we add a stand-alone hazard recognizer pass which can handle all
2179+
// cases.
2180+
// TODO: addPass(PostRAHazardRecognizerPass());
2181+
addPass(AMDGPUWaitSGPRHazardsPass());
2182+
2183+
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) {
2184+
addPass(AMDGPUInsertDelayAluPass());
2185+
}
2186+
2187+
// TODO: addPass(BranchRelaxationPass());
2188+
}
2189+
21502190
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
21512191
CodeGenOptLevel Level) const {
21522192
if (Opt.getNumOccurrences())

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

+1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ class AMDGPUCodeGenPassBuilder
180180
void addPreRewrite(AddMachinePass &) const;
181181
void addMachineSSAOptimization(AddMachinePass &) const;
182182
void addPostRegAlloc(AddMachinePass &) const;
183+
void addPreEmitPass(AddMachinePass &) const;
183184

184185
/// Check if a pass is enabled given \p Opt option. The option always
185186
/// overrides defaults if explicitly used. Otherwise its default will be used

llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -passes=amdgpu-insert-delay-alu %s -o - | FileCheck %s
34

45
---
56
name: valu_dep_1

0 commit comments

Comments
 (0)