Skip to content

[AMDGPU][NPM] Port GCNCreateVOPD to NPM #130059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 14, 2025

Conversation

optimisan
Copy link
Contributor

No description provided.

Copy link

github-actions bot commented Mar 7, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@optimisan optimisan force-pushed the users/optimisan/preemit/port-gcn-create-vopd branch from 9ba0133 to 9d01cd5 Compare March 10, 2025 04:44
@optimisan optimisan changed the base branch from users/optimisan/postbb/port-patchable-function to users/optimisan/preemit/format-gcn-create-vopd March 10, 2025 04:59
@optimisan optimisan force-pushed the users/optimisan/preemit/format-gcn-create-vopd branch from 78bcc3a to 0ab8c65 Compare March 11, 2025 08:42
@optimisan optimisan force-pushed the users/optimisan/preemit/port-gcn-create-vopd branch from 9d01cd5 to 4ceba75 Compare March 11, 2025 08:43
@optimisan optimisan marked this pull request as ready for review March 11, 2025 09:03
@optimisan optimisan requested review from arsenm and cdevadas March 11, 2025 09:04
@optimisan optimisan requested a review from vikramRH March 11, 2025 09:04
@llvmbot
Copy link
Member

llvmbot commented Mar 11, 2025

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/130059.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+7-1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2)
  • (modified) llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp (+119-100)
  • (modified) llvm/test/CodeGen/AMDGPU/vopd-combine.mir (+2)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 00355d8fb5e5f..96f23432685de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -358,6 +358,12 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {
   PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);
 };
 
+class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &AM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -443,7 +449,7 @@ extern char &SIFormMemoryClausesID;
 void initializeSIPostRABundlerLegacyPass(PassRegistry &);
 extern char &SIPostRABundlerLegacyID;
 
-void initializeGCNCreateVOPDPass(PassRegistry &);
+void initializeGCNCreateVOPDLegacyPass(PassRegistry &);
 extern char &GCNCreateVOPDID;
 
 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 16ae23133a549..98b0bc7358e9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUse
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
+MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 856b5eb359c49..b06e87baa4ea9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -547,7 +547,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesLegacyPass(*PR);
   initializeSIPostRABundlerLegacyPass(*PR);
-  initializeGCNCreateVOPDPass(*PR);
+  initializeGCNCreateVOPDLegacyPass(*PR);
   initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
   initializeAMDGPUAAWrapperPassPass(*PR);
   initializeAMDGPUExternalAAWrapperPass(*PR);
@@ -2150,7 +2150,7 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
 
 void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
-    // TODO: addPass(GCNCreateVOPDPass());
+    addPass(GCNCreateVOPDPass());
   }
   // TODO: addPass(SIMemoryLegalizerPass());
   // TODO: addPass(SIInsertWaitcntsPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 798279b279da3..22123f738c948 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "gcn-create-vopd"
@@ -36,7 +37,7 @@ using namespace llvm;
 
 namespace {
 
-class GCNCreateVOPD : public MachineFunctionPass {
+class GCNCreateVOPD {
 private:
   class VOPDCombineInfo {
   public:
@@ -48,126 +49,144 @@ class GCNCreateVOPD : public MachineFunctionPass {
     MachineInstr *SecondMI;
   };
 
-public:
-  static char ID;
-  const GCNSubtarget *ST = nullptr;
+  public:
+    const GCNSubtarget *ST = nullptr;
+
+    bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+      auto *FirstMI = CI.FirstMI;
+      auto *SecondMI = CI.SecondMI;
+      unsigned Opc1 = FirstMI->getOpcode();
+      unsigned Opc2 = SecondMI->getOpcode();
+      unsigned EncodingFamily =
+          AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+      int NewOpcode =
+          AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+                              AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+      assert(NewOpcode != -1 &&
+             "Should have previously determined this as a possible VOPD\n");
+
+      auto VOPDInst =
+          BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(),
+                  SII->get(NewOpcode))
+              .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+
+      namespace VOPD = AMDGPU::VOPD;
+      MachineInstr *MI[] = {FirstMI, SecondMI};
+      auto InstInfo =
+          AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
+
+      for (auto CompIdx : VOPD::COMPONENTS) {
+        auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
+        VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+      }
 
-  GCNCreateVOPD() : MachineFunctionPass(ID) {}
+      for (auto CompIdx : VOPD::COMPONENTS) {
+        auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+        for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum;
+             ++CompSrcIdx) {
+          auto MCOprIdx =
+              InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+          VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+        }
+      }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
+      SII->fixImplicitOperands(*VOPDInst);
+      for (auto CompIdx : VOPD::COMPONENTS)
+        VOPDInst.copyImplicitOps(*MI[CompIdx]);
 
-  StringRef getPassName() const override {
-    return "GCN Create VOPD Instructions";
-  }
+      LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+                        << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
 
-  bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
-    auto *FirstMI = CI.FirstMI;
-    auto *SecondMI = CI.SecondMI;
-    unsigned Opc1 = FirstMI->getOpcode();
-    unsigned Opc2 = SecondMI->getOpcode();
-    unsigned EncodingFamily =
-        AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
-    int NewOpcode =
-        AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
-                            AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
-    assert(NewOpcode != -1 &&
-           "Should have previously determined this as a possible VOPD\n");
-
-    auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
-                            FirstMI->getDebugLoc(), SII->get(NewOpcode))
-                        .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
-
-    namespace VOPD = AMDGPU::VOPD;
-    MachineInstr *MI[] = {FirstMI, SecondMI};
-    auto InstInfo =
-        AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
-
-    for (auto CompIdx : VOPD::COMPONENTS) {
-      auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
-      VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
-    }
+      for (auto CompIdx : VOPD::COMPONENTS)
+        MI[CompIdx]->eraseFromParent();
 
-    for (auto CompIdx : VOPD::COMPONENTS) {
-      auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
-      for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
-        auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
-        VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
-      }
+      ++NumVOPDCreated;
+      return true;
     }
 
-    SII->fixImplicitOperands(*VOPDInst);
-    for (auto CompIdx : VOPD::COMPONENTS)
-      VOPDInst.copyImplicitOps(*MI[CompIdx]);
+    bool run(MachineFunction &MF) {
+      ST = &MF.getSubtarget<GCNSubtarget>();
+      if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
+        return false;
+      LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
+
+      const SIInstrInfo *SII = ST->getInstrInfo();
+      bool Changed = false;
+
+      SmallVector<VOPDCombineInfo> ReplaceCandidates;
+
+      for (auto &MBB : MF) {
+        auto MII = MBB.begin(), E = MBB.end();
+        while (MII != E) {
+          auto *FirstMI = &*MII;
+          MII = next_nodbg(MII, MBB.end());
+          if (MII == MBB.end())
+            break;
+          if (FirstMI->isDebugInstr())
+            continue;
+          auto *SecondMI = &*MII;
+          unsigned Opc = FirstMI->getOpcode();
+          unsigned Opc2 = SecondMI->getOpcode();
+          llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
+          llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
+          VOPDCombineInfo CI;
+
+          if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
+            CI = VOPDCombineInfo(FirstMI, SecondMI);
+          else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
+            CI = VOPDCombineInfo(SecondMI, FirstMI);
+          else
+            continue;
+          // checkVOPDRegConstraints cares about program order, but doReplace
+          // cares about X-Y order in the constituted VOPD
+          if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
+            ReplaceCandidates.push_back(CI);
+            ++MII;
+          }
+        }
+      }
+      for (auto &CI : ReplaceCandidates) {
+        Changed |= doReplace(SII, CI);
+      }
 
-    LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
-                      << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
+      return Changed;
+    }
+};
 
-    for (auto CompIdx : VOPD::COMPONENTS)
-      MI[CompIdx]->eraseFromParent();
+class GCNCreateVOPDLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {}
 
-    ++NumVOPDCreated;
-    return true;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
   }
 
+  StringRef getPassName() const override {
+    return "GCN Create VOPD Instructions";
+  }
   bool runOnMachineFunction(MachineFunction &MF) override {
     if (skipFunction(MF.getFunction()))
       return false;
-    ST = &MF.getSubtarget<GCNSubtarget>();
-    if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32())
-      return false;
-    LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n");
-
-    const SIInstrInfo *SII = ST->getInstrInfo();
-    bool Changed = false;
-
-    SmallVector<VOPDCombineInfo> ReplaceCandidates;
-
-    for (auto &MBB : MF) {
-      auto MII = MBB.begin(), E = MBB.end();
-      while (MII != E) {
-        auto *FirstMI = &*MII;
-        MII = next_nodbg(MII, MBB.end());
-        if (MII == MBB.end())
-          break;
-        if (FirstMI->isDebugInstr())
-          continue;
-        auto *SecondMI = &*MII;
-        unsigned Opc = FirstMI->getOpcode();
-        unsigned Opc2 = SecondMI->getOpcode();
-        llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
-        llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
-        VOPDCombineInfo CI;
-
-        if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
-          CI = VOPDCombineInfo(FirstMI, SecondMI);
-        else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
-          CI = VOPDCombineInfo(SecondMI, FirstMI);
-        else
-          continue;
-        // checkVOPDRegConstraints cares about program order, but doReplace
-        // cares about X-Y order in the constituted VOPD
-        if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
-          ReplaceCandidates.push_back(CI);
-          ++MII;
-        }
-      }
-    }
-    for (auto &CI : ReplaceCandidates) {
-      Changed |= doReplace(SII, CI);
-    }
 
-    return Changed;
+    return GCNCreateVOPD().run(MF);
   }
 };
 
 } // namespace
 
-char GCNCreateVOPD::ID = 0;
+PreservedAnalyses
+llvm::GCNCreateVOPDPass::run(MachineFunction &MF,
+                             MachineFunctionAnalysisManager &AM) {
+  if (!GCNCreateVOPD().run(MF))
+    return PreservedAnalyses::all();
+  return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
+}
+
+char GCNCreateVOPDLegacy::ID = 0;
 
-char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID;
+char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID;
 
-INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions",
+INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions",
                 false, false)
diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
index 8d5060177c63d..5a13401c1631c 100644
--- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir
@@ -4,6 +4,8 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
 
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s
+
 --- |
   @lds = external addrspace(3) global [8 x i8]
   define void @vopd_schedule() { ret void }

@optimisan optimisan force-pushed the users/optimisan/preemit/format-gcn-create-vopd branch from 0ab8c65 to 5185a14 Compare March 12, 2025 06:00
@optimisan optimisan force-pushed the users/optimisan/preemit/port-gcn-create-vopd branch from 4d7120d to 1aeebc2 Compare March 12, 2025 06:03
Base automatically changed from users/optimisan/preemit/format-gcn-create-vopd to main March 12, 2025 09:03
@optimisan optimisan force-pushed the users/optimisan/preemit/port-gcn-create-vopd branch from 1aeebc2 to 63c0f40 Compare March 12, 2025 09:41
@optimisan optimisan force-pushed the users/optimisan/preemit/port-gcn-create-vopd branch from 63c0f40 to 1e64f09 Compare March 12, 2025 09:43
@optimisan optimisan merged commit f34385d into main Mar 14, 2025
11 checks passed
@optimisan optimisan deleted the users/optimisan/preemit/port-gcn-create-vopd branch March 14, 2025 04:52
frederik-h pushed a commit to frederik-h/llvm-project that referenced this pull request Mar 18, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants