-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][NPM] Port GCNCreateVOPD to NPM #130059
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This was referenced Mar 6, 2025
This was referenced Mar 6, 2025
c9386f1
to
399b997
Compare
d82b6dd
to
9ba0133
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
9ba0133
to
9d01cd5
Compare
78bcc3a
to
0ab8c65
Compare
9d01cd5
to
4ceba75
Compare
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesFull diff: https://github.com/llvm/llvm-project/pull/130059.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 00355d8fb5e5f..96f23432685de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -358,6 +358,12 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);
};
+class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &AM);
+};
+
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -443,7 +449,7 @@ extern char &SIFormMemoryClausesID;
void initializeSIPostRABundlerLegacyPass(PassRegistry &);
extern char &SIPostRABundlerLegacyID;
-void initializeGCNCreateVOPDPass(PassRegistry &);
+void initializeGCNCreateVOPDLegacyPass(PassRegistry &);
extern char &GCNCreateVOPDID;
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 16ae23133a549..98b0bc7358e9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUse
MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
+MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 856b5eb359c49..b06e87baa4ea9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -547,7 +547,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesLegacyPass(*PR);
initializeSIPostRABundlerLegacyPass(*PR);
- initializeGCNCreateVOPDPass(*PR);
+ initializeGCNCreateVOPDLegacyPass(*PR);
initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
initializeAMDGPUAAWrapperPassPass(*PR);
initializeAMDGPUExternalAAWrapperPass(*PR);
@@ -2150,7 +2150,7 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
- // TODO: addPass(GCNCreateVOPDPass());
+ addPass(GCNCreateVOPDPass());
}
// TODO: addPass(SIMemoryLegalizerPass());
// TODO: addPass(SIInsertWaitcntsPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 798279b279da3..22123f738c948 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "gcn-create-vopd"
@@ -36,7 +37,7 @@ using namespace llvm;
namespace {
-class GCNCreateVOPD : public MachineFunctionPass {
+class GCNCreateVOPD {
private:
class VOPDCombineInfo {
public:
@@ -48,126 +49,144 @@ class GCNCreateVOPD : public MachineFunctionPass {
MachineInstr *SecondMI;
};
-public:
- static char ID;
- const GCNSubtarget *ST = nullptr;
+ public:
+ const GCNSubtarget *ST = nullptr;
+
+ bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+ auto *FirstMI = CI.FirstMI;
+ auto *SecondMI = CI.SecondMI;
+ unsigned Opc1 = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ unsigned EncodingFamily =
+ AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+ int NewOpcode =
+ AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+ AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+ assert(NewOpcode != -1 &&
+ "Should have previously determined this as a possible VOPD\n");
+
+ auto VOPDInst =
+ BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(),
+ SII->get(NewOpcode))
+ .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+
+ namespace VOPD = AMDGPU::VOPD;
+ MachineInstr *MI[] = {FirstMI, SecondMI};
+ auto InstInfo =
+ AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
+
+ for (auto CompIdx : VOPD::COMPONENTS) {
+ auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
+ VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+ }
- GCNCreateVOPD() : MachineFunctionPass(ID) {}
+ for (auto CompIdx : VOPD::COMPONENTS) {
+ auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+ for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum;
+ ++CompSrcIdx) {
+ auto MCOprIdx =
+ InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+ VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+ }
+ }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
+ SII->fixImplicitOperands(*VOPDInst);
+ for (auto CompIdx : VOPD::COMPONENTS)
+ VOPDInst.copyImplicitOps(*MI[CompIdx]);
- StringRef getPassName() const override {
- return "GCN Create VOPD Instructions";
- }
+ LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+ << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
- bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
- auto *FirstMI = CI.FirstMI;
- auto *SecondMI = CI.SecondMI;
- unsigned Opc1 = FirstMI->getOpcode();
- unsigned Opc2 = SecondMI->getOpcode();
- unsigned EncodingFamily =
- AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
- int NewOpcode =
- AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
- AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
- assert(NewOpcode != -1 &&
- "Should have previously determined this as a possible VOPD\n");
-
- auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
- FirstMI->getDebugLoc(), SII->get(NewOpcode))
- .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
-
- namespace VOPD = AMDGPU::VOPD;
- MachineInstr *MI[] = {FirstMI, SecondMI};
- auto InstInfo =
- AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
-
- for (auto CompIdx : VOPD::COMPONENTS) {
- auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
- VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
- }
+ for (auto CompIdx : VOPD::COMPONENTS)
+ MI[CompIdx]->eraseFromParent();
- for (auto CompIdx : VOPD::COMPONENTS) {
- auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
- for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
- auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
- VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
- }
+ ++NumVOPDCreated;
+ return true;
}
- SII->fixImplicitOperands(*VOPDInst);
- for (auto CompIdx : VOPD::COMPONENTS)
- VOPDInst.copyImplicitOps(*MI[CompIdx]);
+ bool run(MachineFunction &MF) {
+ ST = &MF.getSubtarget<GCNSubtarget>();
+ if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
+ return false;
+ LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
+
+ const SIInstrInfo *SII = ST->getInstrInfo();
+ bool Changed = false;
+
+ SmallVector<VOPDCombineInfo> ReplaceCandidates;
+
+ for (auto &MBB : MF) {
+ auto MII = MBB.begin(), E = MBB.end();
+ while (MII != E) {
+ auto *FirstMI = &*MII;
+ MII = next_nodbg(MII, MBB.end());
+ if (MII == MBB.end())
+ break;
+ if (FirstMI->isDebugInstr())
+ continue;
+ auto *SecondMI = &*MII;
+ unsigned Opc = FirstMI->getOpcode();
+ unsigned Opc2 = SecondMI->getOpcode();
+ llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+ llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+ VOPDCombineInfo CI;
+
+ if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+ CI = VOPDCombineInfo(FirstMI, SecondMI);
+ else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+ CI = VOPDCombineInfo(SecondMI, FirstMI);
+ else
+ continue;
+ // checkVOPDRegConstraints cares about program order, but doReplace
+ // cares about X-Y order in the constituted VOPD
+ if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+ ReplaceCandidates.push_back(CI);
+ ++MII;
+ }
+ }
+ }
+ for (auto &CI : ReplaceCandidates) {
+ Changed |= doReplace(SII, CI);
+ }
- LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
- << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
+ return Changed;
+ }
+};
- for (auto CompIdx : VOPD::COMPONENTS)
- MI[CompIdx]->eraseFromParent();
+class GCNCreateVOPDLegacy : public MachineFunctionPass {
+public:
+ static char ID;
+ GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {}
- ++NumVOPDCreated;
- return true;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
+ StringRef getPassName() const override {
+ return "GCN Create VOPD Instructions";
+ }
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;
- ST = &MF.getSubtarget<GCNSubtarget>();
- if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
- return false;
- LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
-
- const SIInstrInfo *SII = ST->getInstrInfo();
- bool Changed = false;
-
- SmallVector<VOPDCombineInfo> ReplaceCandidates;
-
- for (auto &MBB : MF) {
- auto MII = MBB.begin(), E = MBB.end();
- while (MII != E) {
- auto *FirstMI = &*MII;
- MII = next_nodbg(MII, MBB.end());
- if (MII == MBB.end())
- break;
- if (FirstMI->isDebugInstr())
- continue;
- auto *SecondMI = &*MII;
- unsigned Opc = FirstMI->getOpcode();
- unsigned Opc2 = SecondMI->getOpcode();
- llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
- llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
- VOPDCombineInfo CI;
-
- if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
- CI = VOPDCombineInfo(FirstMI, SecondMI);
- else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
- CI = VOPDCombineInfo(SecondMI, FirstMI);
- else
- continue;
- // checkVOPDRegConstraints cares about program order, but doReplace
- // cares about X-Y order in the constituted VOPD
- if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
- ReplaceCandidates.push_back(CI);
- ++MII;
- }
- }
- }
- for (auto &CI : ReplaceCandidates) {
- Changed |= doReplace(SII, CI);
- }
- return Changed;
+ return GCNCreateVOPD().run(MF);
}
};
} // namespace
-char GCNCreateVOPD::ID = 0;
+PreservedAnalyses
+llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &AM) {
+ if (!GCNCreateVOPD().run(MF))
+ return PreservedAnalyses::all();
+ return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
+}
+
+char GCNCreateVOPDLegacy::ID = 0;
-char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID;
+char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID;
-INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions",
+INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions",
false, false)
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
index 8d5060177c63d..5a13401c1631c 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
@@ -4,6 +4,8 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
+
--- |
@lds = external addrspace(3) global [8 x i8]
define void @vopd_schedule() { ret void }
|
arsenm
reviewed
Mar 11, 2025
0ab8c65
to
5185a14
Compare
4d7120d
to
1aeebc2
Compare
arsenm
approved these changes
Mar 12, 2025
Base automatically changed from
users/optimisan/preemit/format-gcn-create-vopd
to
main
March 12, 2025 09:03
1aeebc2
to
63c0f40
Compare
63c0f40
to
1e64f09
Compare
frederik-h
pushed a commit
to frederik-h/llvm-project
that referenced
this pull request
Mar 18, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.