-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][NewPM] Port SIOptimizeExecMasking to NPM #123572
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesFull diff: https://github.com/llvm/llvm-project/pull/123572.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 89356df39724a4..5d9a830f041a74 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -216,8 +216,8 @@ extern char &SIPreEmitPeepholeID;
void initializeSILateBranchLoweringPass(PassRegistry &);
extern char &SILateBranchLoweringPassID;
-void initializeSIOptimizeExecMaskingPass(PassRegistry &);
-extern char &SIOptimizeExecMaskingID;
+void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &);
+extern char &SIOptimizeExecMaskingLegacyID;
void initializeSIPreAllocateWWMRegsLegacyPass(PassRegistry &);
extern char &SIPreAllocateWWMRegsLegacyID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index fbcf83e2fdd60b..09a39d23d801b9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -105,6 +105,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
+MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 98268b848f5ce6..53ec80b8f72049 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -42,6 +42,7 @@
#include "SILowerSGPRSpills.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
+#include "SIOptimizeExecMasking.h"
#include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
@@ -528,7 +529,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIPreEmitPeepholePass(*PR);
initializeSILateBranchLoweringPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
- initializeSIOptimizeExecMaskingPass(*PR);
+ initializeSIOptimizeExecMaskingLegacyPass(*PR);
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesPass(*PR);
initializeSIPostRABundlerPass(*PR);
@@ -1634,7 +1635,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
void GCNPassConfig::addPostRegAlloc() {
addPass(&SIFixVGPRCopiesID);
if (getOptLevel() > CodeGenOptLevel::None)
- addPass(&SIOptimizeExecMaskingID);
+ addPass(&SIOptimizeExecMaskingLegacyID);
TargetPassConfig::addPostRegAlloc();
}
@@ -2105,6 +2106,13 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addPass(SIShrinkInstructionsPass());
}
+void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
+ // addPass(SIFixVGPRCopiesID);
+ if (TM.getOptLevel() > CodeGenOptLevel::None)
+ addPass(SIOptimizeExecMaskingPass());
+ Base::addPostRegAlloc(addPass);
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5ba58a92621edb..24b4da3a68f67e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,7 @@ class AMDGPUCodeGenPassBuilder
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
+ void addPostRegAlloc(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
index 93b70fa4ba974c..3fb8d5b560496b 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "SIOptimizeExecMasking.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -23,7 +24,7 @@ using namespace llvm;
namespace {
-class SIOptimizeExecMasking : public MachineFunctionPass {
+class SIOptimizeExecMasking {
MachineFunction *MF = nullptr;
const GCNSubtarget *ST = nullptr;
const SIRegisterInfo *TRI = nullptr;
@@ -61,11 +62,16 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
void tryRecordOrSaveexecXorSequence(MachineInstr &MI);
bool optimizeOrSaveexecXorSequences();
+public:
+ bool run(MachineFunction &MF);
+};
+
+class SIOptimizeExecMaskingLegacy : public MachineFunctionPass {
public:
static char ID;
- SIOptimizeExecMasking() : MachineFunctionPass(ID) {
- initializeSIOptimizeExecMaskingPass(*PassRegistry::getPassRegistry());
+ SIOptimizeExecMaskingLegacy() : MachineFunctionPass(ID) {
+ initializeSIOptimizeExecMaskingLegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -82,15 +88,28 @@ class SIOptimizeExecMasking : public MachineFunctionPass {
} // End anonymous namespace.
-INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,
+PreservedAnalyses
+SIOptimizeExecMaskingPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &) {
+ SIOptimizeExecMasking Impl;
+
+ if (!Impl.run(MF))
+ return PreservedAnalyses::all();
+
+ auto PA = getMachineFunctionPassPreservedAnalyses();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
+
+INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingLegacy, DEBUG_TYPE,
"SI optimize exec mask operations", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
-INITIALIZE_PASS_END(SIOptimizeExecMasking, DEBUG_TYPE,
+INITIALIZE_PASS_END(SIOptimizeExecMaskingLegacy, DEBUG_TYPE,
"SI optimize exec mask operations", false, false)
-char SIOptimizeExecMasking::ID = 0;
+char SIOptimizeExecMaskingLegacy::ID = 0;
-char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
+char &llvm::SIOptimizeExecMaskingLegacyID = SIOptimizeExecMaskingLegacy::ID;
/// If \p MI is a copy from exec, return the register copied to.
Register SIOptimizeExecMasking::isCopyFromExec(const MachineInstr &MI) const {
@@ -786,10 +805,14 @@ bool SIOptimizeExecMasking::optimizeOrSaveexecXorSequences() {
return Changed;
}
-bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
+bool SIOptimizeExecMaskingLegacy::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ return SIOptimizeExecMasking().run(MF);
+}
+
+bool SIOptimizeExecMasking::run(MachineFunction &MF) {
this->MF = &MF;
ST = &MF.getSubtarget<GCNSubtarget>();
TRI = ST->getRegisterInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h
new file mode 100644
index 00000000000000..f170a4733279b0
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.h
@@ -0,0 +1,23 @@
+//===- SIOptimizeExecMasking.h ----------------------------------*- C++- *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
+#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIOptimizeExecMaskingPass
+ : public PassInfoMixin<SIOptimizeExecMaskingPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKING_H
diff --git a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
index 8fb3e9ea3609b5..acc9bf78a34011 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
+++ b/llvm/test/CodeGen/AMDGPU/lower-term-opcodes.mir
@@ -2,6 +2,8 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking -verify-machineinstrs %s -o - | FileCheck %s
---
name: lower_term_opcodes
tracksRegLiveness: false
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir
index ada5cfd5668eef..f2534a93da3025 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-copies-extra-insts-after-copy.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass=si-optimize-exec-masking -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -passes=si-optimize-exec-masking -o - %s | FileCheck %s
# Make sure we can still optimize writes to exec when there are
# additional terminators after the exec write. This can happen with
|
arsenm
approved these changes
Jan 20, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.