Skip to content

[AMDGPU][NPM] Support -regalloc-npm options #129035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
#include "llvm/CodeGen/MachineLICM.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocFast.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
Expand Down Expand Up @@ -189,6 +190,24 @@ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use for WWM registers"));

static cl::opt<RegAllocType, false, RegAllocTypeParser> SGPRRegAllocTypeNPM(
"sgpr-regalloc-npm", cl::Hidden,
cl::desc("Register allocator to use for SGPRs in new pass "
"manager"),
cl::init(RegAllocType::Default));

static cl::opt<RegAllocType, false, RegAllocTypeParser> VGPRRegAllocTypeNPM(
"vgpr-regalloc-npm", cl::Hidden,
cl::desc("Register allocator to use for VGPRs in new pass "
"manager"),
cl::init(RegAllocType::Default));

static cl::opt<RegAllocType, false, RegAllocTypeParser> WWMRegAllocTypeNPM(
"wwm-regalloc-npm", cl::Hidden,
cl::desc("Register allocator to use for WWM registers in "
"new pass manager"),
cl::init(RegAllocType::Default));

static void initializeDefaultSGPRRegisterAllocatorOnce() {
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();

Expand Down Expand Up @@ -2141,6 +2160,113 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addPass(SIShrinkInstructionsPass());
}

static const char NPMRegAllocOptNotSupportedMessage[] =
"-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
"-wwm-regalloc-npm, "
"and -vgpr-regalloc-npm";

template <typename RegAllocPassT>
typename RegAllocPassT::Options
AMDGPUCodeGenPassBuilder::getRAOptionsForPhase(RegAllocPhase Phase) const {
#define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \
[&]() { \
if constexpr (std::is_same_v<RegAllocPassT, RegAllocFastPass>) { \
return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \
} else { \
return typename RegAllocPassT::Options{FilterFunc, Name}; \
} \
}()

switch (Phase) {
case RegAllocPhase::SGPR:
return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false);
case RegAllocPhase::WWM:
return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false);
case RegAllocPhase::VGPR:
return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true);
}

llvm_unreachable("invalid phase value");
#undef RA_OPTIONS
}

template <typename RegAllocPassT>
void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass,
RegAllocPhase Phase) const {
RegAllocType RAType;
// Read the appropriate phase's regalloc type.
switch (Phase) {
case RegAllocPhase::SGPR:
RAType = SGPRRegAllocTypeNPM;
break;
case RegAllocPhase::WWM:
RAType = WWMRegAllocTypeNPM;
break;
case RegAllocPhase::VGPR:
RAType = VGPRRegAllocTypeNPM;
break;
}

// Construct the pass with the appropriate options.
switch (RAType) {
case RegAllocType::Greedy:
addPass(RAGreedyPass(getRAOptionsForPhase<RAGreedyPass>(Phase)));
return;
case RegAllocType::Fast:
addPass(RegAllocFastPass(getRAOptionsForPhase<RegAllocFastPass>(Phase)));
return;
case RegAllocType::Unset:
case RegAllocType::Default:
addPass(RegAllocPassT(getRAOptionsForPhase<RegAllocPassT>(Phase)));
return;
default:
report_fatal_error("Unsupported regalloc type for AMDGPU", false);
}
}

Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
AddMachinePass &addPass) const {
if (Opt.RegAlloc != RegAllocType::Unset)
return make_error<StringError>(NPMRegAllocOptNotSupportedMessage,
inconvertibleErrorCode());

addPass(GCNPreRALongBranchRegPass());

addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::SGPR);

// Commit allocated register changes. This is mostly necessary because too
// many things rely on the use lists of the physical registers, such as the
// verifier. This is only necessary with allocators which use LiveIntervals,
// since FastRegAlloc does the replacements itself.
// TODO: addPass(VirtRegRewriterPass(false));

// At this point, the sgpr-regalloc has been done and it is good to have the
// stack slot coloring to try to optimize the SGPR spill stack indices before
// attempting the custom SGPR spill lowering.
addPass(StackSlotColoringPass());

// Equivalent of PEI for SGPRs.
addPass(SILowerSGPRSpillsPass());

// To Allocate wwm registers used in whole quad mode operations (for shaders).
addPass(SIPreAllocateWWMRegsPass());

// For allocating other wwm register operands.
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::WWM);
addPass(SILowerWWMCopiesPass());
// TODO: addPass(VirtRegRewriterPass(false));
// TODO: addPass(AMDGPUReserveWWMRegsPass());

// For allocating per-thread VGPRs.
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::VGPR);

// TODO: addPreRewrite();
addPass(VirtRegRewriterPass(false));

// TODO: addPass(AMDGPUMarkLastScratchLoadPass());
return Error::success();
}

void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
addPass(SIFixVGPRCopiesPass());
if (TM.getOptLevel() > CodeGenOptLevel::None)
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@

#include "GCNSubtarget.h"
#include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
#include "llvm/CodeGen/RegAllocCommon.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Passes/CodeGenPassBuilder.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include <optional>
#include <utility>

Expand Down Expand Up @@ -179,6 +181,7 @@ class AMDGPUCodeGenPassBuilder
Error addInstSelector(AddMachinePass &) const;
void addPreRewrite(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
Error addRegAssignmentOptimized(AddMachinePass &) const;
void addPostRegAlloc(AddMachinePass &) const;
void addPreEmitPass(AddMachinePass &) const;

Expand All @@ -189,6 +192,23 @@ class AMDGPUCodeGenPassBuilder
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
void addEarlyCSEOrGVNPass(AddIRPass &) const;
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;

private:
enum class RegAllocPhase { SGPR, VGPR, WWM };

template <typename RegAllocPassT>
typename RegAllocPassT::Options getRAOptionsForPhase(RegAllocPhase) const;

/// \brief Add register allocation pass to the pass manager.
/// This checks for the regalloc type given through
/// -{phase}-regalloc-npm={type} cl option. If the option is not specified, it
/// uses the preferred regalloc pass type.
///
/// \tparam PreferredRegAllocPassT The fallback reg alloc pass type to use if
/// cl::opt is unspecified.
/// \param Phase The phase of register allocation to add.
template <typename PreferredRegAllocPassT>
void addRegAlloc(AddMachinePass &, RegAllocPhase Phase) const;
};

} // end namespace llvm
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER

# RUN: llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=greedy -wwm-regalloc-npm=fast -vgpr-regalloc-npm=fast -print-pipeline-passes %s | FileCheck %s --check-prefix=NPM-PASS


# PASS: regallocfast<filter=sgpr>
# PASS: regallocfast<filter=wwm>
# PASS: regallocfast<filter=vgpr>
# BAD-FILTER: invalid regallocfast register filter 'bad-filter'

# NPM-PASS: greedy<sgpr>
# NPM-PASS: regallocfast<filter=wwm;no-clear-vregs>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this option no-clear-vregs is exposed to the commandline? This was originally an internal flag to control the vreg clearing for targets requiring multiple regalloc pipelines.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How else would you test this part of the pipeline standalone

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember seeing a command line option for doing it in the legacy path. So it's something new we're introducing in the NPM?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only way you can do it now is -start-before / stop-after now. This would allow you to have the explicit passes in the run line

# NPM-PASS: regallocfast<filter=vgpr>
---
name: f
...