-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM #125351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
c31b0c4
to
764026d
Compare
7ef9eb5
to
1be5934
Compare
@@ -1,4 +1,5 @@ | |||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s | |||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove this line for now if you want to immediately merge it. The other test is sufficient to validate the NPM flow.
0f7f666
to
c20de57
Compare
3a4cc45
to
c6fa1b8
Compare
c20de57
to
74528e6
Compare
c6fa1b8
to
dd272d2
Compare
79d71b8
to
90931d8
Compare
e5e5662
to
97ea769
Compare
c2f7199
to
a980669
Compare
9b4d0f5
to
e0ecd0f
Compare
a980669
to
7d6c02f
Compare
@llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesFull diff: https://github.com/llvm/llvm-project/pull/125351.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 42392e22643b2..80786c6fefd3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
-void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
+void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &);
extern char &SIOptimizeExecMaskingPreRAID;
void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index fd1341e8c91b2..a4504d78c7250 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
+MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
@@ -130,7 +131,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7c9377e61230b..dbd126d18785a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -51,6 +51,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIOptimizeExecMasking.h"
+#include "SIOptimizeExecMaskingPreRA.h"
#include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h"
#include "SIPreAllocateWWMRegs.h"
@@ -501,7 +502,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIFoldOperandsLegacyPass(*PR);
initializeSIPeepholeSDWALegacyPass(*PR);
initializeSIShrinkInstructionsLegacyPass(*PR);
- initializeSIOptimizeExecMaskingPreRAPass(*PR);
+ initializeSIOptimizeExecMaskingPreRALegacyPass(*PR);
initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR);
initializeSILoadStoreOptimizerLegacyPass(*PR);
initializeAMDGPUCtorDtorLoweringLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 31f65d82a4d2b..b2228574378f1 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -12,6 +12,7 @@
///
//===----------------------------------------------------------------------===//
+#include "SIOptimizeExecMaskingPreRA.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -25,7 +26,7 @@ using namespace llvm;
namespace {
-class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
+class SIOptimizeExecMaskingPreRA {
private:
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
@@ -42,11 +43,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
bool optimizeVcndVcmpPair(MachineBasicBlock &MBB);
bool optimizeElseBranch(MachineBasicBlock &MBB);
+public:
+ SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {}
+ bool run(MachineFunction &MF);
+};
+
+class SIOptimizeExecMaskingPreRALegacy : public MachineFunctionPass {
public:
static char ID;
- SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {
- initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());
+ SIOptimizeExecMaskingPreRALegacy() : MachineFunctionPass(ID) {
+ initializeSIOptimizeExecMaskingPreRALegacyPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -64,18 +72,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass {
} // End anonymous namespace.
-INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
"SI optimize exec mask operations pre-RA", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
-INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE,
+INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE,
"SI optimize exec mask operations pre-RA", false, false)
-char SIOptimizeExecMaskingPreRA::ID = 0;
+char SIOptimizeExecMaskingPreRALegacy::ID = 0;
-char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID;
+char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRALegacy::ID;
FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() {
- return new SIOptimizeExecMaskingPreRA();
+ return new SIOptimizeExecMaskingPreRALegacy();
}
// See if there is a def between \p AndIdx and \p SelIdx that needs to live
@@ -340,15 +348,28 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {
return true;
}
-bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+SIOptimizeExecMaskingPreRAPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF);
+ SIOptimizeExecMaskingPreRA(&LIS).run(MF);
+ return PreservedAnalyses::all();
+}
+
+bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction(
+ MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+ return SIOptimizeExecMaskingPreRA(LIS).run(MF);
+}
+
+bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
- LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
const bool Wave32 = ST.isWave32();
AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
new file mode 100644
index 0000000000000..cf9c6ce5f0083
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h
@@ -0,0 +1,23 @@
+//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
+#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class SIOptimizeExecMaskingPreRAPass
+ : public PassInfoMixin<SIOptimizeExecMaskingPreRAPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
index 7aea97a3053c7..2eb1f5d559651 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
+# RUN: llc -mtriple=amdgcn -verify-machineinstrs -passes=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s
# FIXME: This is a miscompile, and the s_or_b64s need to be preserved.
|
No description provided.