Skip to content

[AMDGPU][NewPM] Port SIOptimizeExecMasking to NPM #123572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 20, 2025

Conversation

optimisan
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Jan 20, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/123572.diff

8 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+2-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+10-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+1)
  • (modified) llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp (+31-8)
  • (added) llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h (+23)
  • (modified) llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir (+2)
  • (modified) llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir (+1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 89356df39724a4..5d9a830f041a74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -216,8 +216,8 @@ extern char &SIPreEmitPeepholeID;
 void initializeSILateBranchLoweringPass(PassRegistry &);
 extern char &SILateBranchLoweringPassID;
 
-void initializeSIOptimizeExecMaskingPass(PassRegistry &);
-extern char &SIOptimizeExecMaskingID;
+void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &);
+extern char &SIOptimizeExecMaskingLegacyID;
 
 void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
 extern char &SIPreAllocateWWMRegsLegacyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index fbcf83e2fdd60b..09a39d23d801b9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -105,6 +105,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
+MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 98268b848f5ce6..53ec80b8f72049 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -42,6 +42,7 @@
 #include "SILowerSGPRSpills.h"
 #include "SIMachineFunctionInfo.h"
 #include "SIMachineScheduler.h"
+#include "SIOptimizeExecMasking.h"
 #include "SIOptimizeVGPRLiveRange.h"
 #include "SIPeepholeSDWA.h"
 #include "SIPreAllocateWWMRegs.h"
@@ -528,7 +529,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeSIPreEmitPeepholePass(*PR);
   initializeSILateBranchLoweringPass(*PR);
   initializeSIMemoryLegalizerPass(*PR);
-  initializeSIOptimizeExecMaskingPass(*PR);
+  initializeSIOptimizeExecMaskingLegacyPass(*PR);
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesPass(*PR);
   initializeSIPostRABundlerPass(*PR);
@@ -1634,7 +1635,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
 void GCNPassConfig::addPostRegAlloc() {
   addPass(&SIFixVGPRCopiesID);
   if (getOptLevel() > CodeGenOptLevel::None)
-    addPass(&SIOptimizeExecMaskingID);
+    addPass(&SIOptimizeExecMaskingLegacyID);
   TargetPassConfig::addPostRegAlloc();
 }
 
@@ -2105,6 +2106,13 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
   addPass(SIShrinkInstructionsPass());
 }
 
+void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
+  // addPass(SIFixVGPRCopiesID);
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+    addPass(SIOptimizeExecMaskingPass());
+  Base::addPostRegAlloc(addPass);
+}
+
 bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
                                              CodeGenOptLevel Level) const {
   if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5ba58a92621edb..24b4da3a68f67e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,7 @@ class AMDGPUCodeGenPassBuilder
   void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
   Error addInstSelector(AddMachinePass &) const;
   void addMachineSSAOptimization(AddMachinePass &) const;
+  void addPostRegAlloc(AddMachinePass &) const;
 
   /// Check if a pass is enabled given \p Opt option. The option always
   /// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 93b70fa4ba974c..3fb8d5b560496b 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "SIOptimizeExecMasking.h"
 #include "AMDGPU.h"
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -23,7 +24,7 @@ using namespace llvm;
 
 namespace {
 
-class SIOptimizeExecMasking : public MachineFunctionPass {
+class SIOptimizeExecMasking {
   MachineFunction *MF = nullptr;
   const GCNSubtarget *ST = nullptr;
   const SIRegisterInfo *TRI = nullptr;
@@ -61,11 +62,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
   void tryRecordOrSaveexecXorSequence(MachineInstr &MI);
   bool optimizeOrSaveexecXorSequences();
 
+public:
+  bool run(MachineFunction &MF);
+};
+
+class SIOptimizeExecMaskingLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  SIOptimizeExecMasking() : MachineFunctionPass(ID) {
-    initializeSIOptimizeExecMaskingPass(*PassRegistry::getPassRegistry());
+  SIOptimizeExecMaskingLegacy() : MachineFunctionPass(ID) {
+    initializeSIOptimizeExecMaskingLegacyPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -82,15 +88,28 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
 
 } // End anonymous namespace.
 
-INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,
+PreservedAnalyses
+SIOptimizeExecMaskingPass::run(MachineFunction &MF,
+                               MachineFunctionAnalysisManager &) {
+  SIOptimizeExecMasking Impl;
+
+  if (!Impl.run(MF))
+    return PreservedAnalyses::all();
+
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  return PA;
+}
+
+INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingLegacy, DEBUG_TYPE,
                       "SI optimize exec mask operations", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
-INITIALIZE_PASS_END(SIOptimizeExecMasking, DEBUG_TYPE,
+INITIALIZE_PASS_END(SIOptimizeExecMaskingLegacy, DEBUG_TYPE,
                     "SI optimize exec mask operations", false, false)
 
-char SIOptimizeExecMasking::ID = 0;
+char SIOptimizeExecMaskingLegacy::ID = 0;
 
-char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
+char &llvm::SIOptimizeExecMaskingLegacyID = SIOptimizeExecMaskingLegacy::ID;
 
 /// If \p MI is a copy from exec, return the register copied to.
 Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const {
@@ -786,10 +805,14 @@ bool SIOptimizeExecMasking::optimizeOrSaveexecXorSequences() {
   return Changed;
 }
 
-bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
+bool SIOptimizeExecMaskingLegacy::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
+  return SIOptimizeExecMasking().run(MF);
+}
+
+bool SIOptimizeExecMasking::run(MachineFunction &MF) {
   this->MF = &MF;
   ST = &MF.getSubtarget<GCNSubtarget>();
   TRI = ST->getRegisterInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h
new file mode 100644
index 00000000000000..f170a4733279b0
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h
@@ -0,0 +1,23 @@
+//===- SIOptimizeExecMasking.h ----------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
+#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIOptimizeExecMaskingPass
+    : public PassInfoMixin<SIOptimizeExecMaskingPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
diff --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
index 8fb3e9ea3609b5..acc9bf78a34011 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
@@ -2,6 +2,8 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-optimize-exec-masking -verify-machineinstrs  %s -o - | FileCheck %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs  %s -o - | FileCheck %s
 
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-optimize-exec-masking -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking -verify-machineinstrs  %s -o - | FileCheck %s
 ---
 name: lower_term_opcodes
 tracksRegLiveness: false
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir
index ada5cfd5668eef..f2534a93da3025 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -passes=si-optimize-exec-masking -o - %s | FileCheck %s
 
 # Make sure we can still optimize writes to exec when there are
 # additional terminators after the exec write. This can happen with

@optimisan optimisan merged commit 96c4f97 into llvm:main Jan 20, 2025
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants