Skip to content

Commit aa1fe57

Browse files
authored
[RegAlloc][NewPM] Plug Greedy RA in codegen pipeline (#120557)
Use `-passes="regallocgreedy<[all|sgpr|wwm|vgpr]>` to insert the greedy RA with a filter and `-regalloc-npm=<type>` to control which RA to use in existing pipeline.
1 parent f244b8e commit aa1fe57

14 files changed

+88
-21
lines changed

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1063,7 +1063,9 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
10631063
///
10641064
/// A target that uses the standard regalloc pass order for fast or optimized
10651065
/// allocation may still override this for per-target regalloc
1066-
/// selection. But -regalloc=... always takes precedence.
1066+
/// selection. But -regalloc-npm=... always takes precedence.
1067+
/// If a target does not want to allow users to set -regalloc-npm=... at all,
1068+
/// check if Opt.RegAlloc == RegAllocType::Unset.
10671069
template <typename Derived, typename TargetMachineT>
10681070
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
10691071
AddMachinePass &addPass, bool Optimized) const {
@@ -1076,10 +1078,29 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
10761078
/// Find and instantiate the register allocation pass requested by this target
10771079
/// at the current optimization level. Different register allocators are
10781080
/// defined as separate passes because they may require different analysis.
1081+
///
1082+
/// This helper ensures that the -regalloc-npm= option is always available,
1083+
/// even for targets that override the default allocator.
10791084
template <typename Derived, typename TargetMachineT>
10801085
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
10811086
AddMachinePass &addPass, bool Optimized) const {
1082-
// TODO: Parse Opt.RegAlloc to add register allocator.
1087+
// Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
1088+
if (Opt.RegAlloc > RegAllocType::Default) {
1089+
switch (Opt.RegAlloc) {
1090+
case RegAllocType::Fast:
1091+
addPass(RegAllocFastPass());
1092+
break;
1093+
case RegAllocType::Greedy:
1094+
addPass(RAGreedyPass());
1095+
break;
1096+
default:
1097+
report_fatal_error("register allocator not supported yet", false);
1098+
}
1099+
return;
1100+
}
1101+
// -regalloc=default or unspecified, so pick based on the optimization level
1102+
// or ask the target for the regalloc pass.
1103+
derived().addTargetRegisterAllocator(addPass, Optimized);
10831104
}
10841105

10851106
template <typename Derived, typename TargetMachineT>
@@ -1150,20 +1171,22 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
11501171
// PreRA instruction scheduling.
11511172
addPass(MachineSchedulerPass(&TM));
11521173

1153-
if (derived().addRegAssignmentOptimized(addPass)) {
1154-
// Allow targets to expand pseudo instructions depending on the choice of
1155-
// registers before MachineCopyPropagation.
1156-
derived().addPostRewrite(addPass);
1174+
if (auto E = derived().addRegAssignmentOptimized(addPass)) {
1175+
// addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
1176+
return;
1177+
}
1178+
// Allow targets to expand pseudo instructions depending on the choice of
1179+
// registers before MachineCopyPropagation.
1180+
derived().addPostRewrite(addPass);
11571181

1158-
// Copy propagate to forward register uses and try to eliminate COPYs that
1159-
// were not coalesced.
1160-
addPass(MachineCopyPropagationPass());
1182+
// Copy propagate to forward register uses and try to eliminate COPYs that
1183+
// were not coalesced.
1184+
addPass(MachineCopyPropagationPass());
11611185

1162-
// Run post-ra machine LICM to hoist reloads / remats.
1163-
//
1164-
// FIXME: can this move into MachineLateOptimization?
1165-
addPass(MachineLICMPass());
1166-
}
1186+
// Run post-ra machine LICM to hoist reloads / remats.
1187+
//
1188+
// FIXME: can this move into MachineLateOptimization?
1189+
addPass(MachineLICMPass());
11671190
}
11681191

11691192
//===---------------------------------------------------------------------===//

llvm/include/llvm/Passes/MachinePassRegistry.def

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
196196
},
197197
"filter=reg-filter;no-clear-vregs")
198198

199+
// 'all' is the default filter.
199200
MACHINE_FUNCTION_PASS_WITH_PARAMS(
200201
"greedy", "RAGreedyPass",
201202
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
202203
[PB = this](StringRef Params) {
203-
// TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
204-
(void)PB;
205-
return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
204+
return parseRegAllocGreedyFilterFunc(*PB, Params);
206205
}, "reg-filter"
207206
)
208207
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS

llvm/include/llvm/Target/CGPassBuilderOption.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,29 @@
1414
#ifndef LLVM_TARGET_CGPASSBUILDEROPTION_H
1515
#define LLVM_TARGET_CGPASSBUILDEROPTION_H
1616

17+
#include "llvm/Support/CommandLine.h"
1718
#include "llvm/Target/TargetOptions.h"
1819
#include <optional>
1920

2021
namespace llvm {
2122

2223
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
23-
enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
24+
enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
25+
26+
class RegAllocTypeParser : public cl::parser<RegAllocType> {
27+
public:
28+
RegAllocTypeParser(cl::Option &O) : cl::parser<RegAllocType>(O) {}
29+
void initialize() {
30+
cl::parser<RegAllocType>::initialize();
31+
addLiteralOption("default", RegAllocType::Default,
32+
"Default register allocator");
33+
addLiteralOption("pbqp", RegAllocType::PBQP, "PBQP register allocator");
34+
addLiteralOption("fast", RegAllocType::Fast, "Fast register allocator");
35+
addLiteralOption("basic", RegAllocType::Basic, "Basic register allocator");
36+
addLiteralOption("greedy", RegAllocType::Greedy,
37+
"Greedy register allocator");
38+
}
39+
};
2440

2541
// Not one-on-one but mostly corresponding to commandline options in
2642
// TargetPassConfig.cpp.
@@ -52,7 +68,7 @@ struct CGPassBuilderOption {
5268
bool RequiresCodeGenSCCOrder = false;
5369

5470
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
55-
StringRef RegAlloc = "default";
71+
RegAllocType RegAlloc = RegAllocType::Unset;
5672
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
5773
std::string FSProfileFile;
5874
std::string FSRemappingFile;

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,20 @@ parseBoundsCheckingOptions(StringRef Params) {
14151415
return Options;
14161416
}
14171417

1418+
Expected<RAGreedyPass::Options>
1419+
parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
1420+
if (Params.empty() || Params == "all")
1421+
return RAGreedyPass::Options();
1422+
1423+
std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
1424+
if (Filter)
1425+
return RAGreedyPass::Options{*Filter, Params};
1426+
1427+
return make_error<StringError>(
1428+
formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
1429+
inconvertibleErrorCode());
1430+
}
1431+
14181432
} // namespace
14191433

14201434
/// Tests whether a pass name starts with a valid prefix for a default pipeline

llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
22
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=greedy -o - %s | FileCheck %s
34

45
--- |
56
define void @inst_stores_to_dead_spill_implicit_def_impdef() {

llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
22
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=arm64-apple-ios -passes=greedy -o - %s | FileCheck %s
34

45
---
56
name: widget

llvm/test/CodeGen/AArch64/pr51516.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
2+
# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=greedy -verify-machineinstrs -o - %s | FileCheck %s
23

34
# Check that we spill %31 and do not rematerialize it since the use operand
45
# of ADDXri is killed by the STRXui in this block.

llvm/test/CodeGen/AArch64/spill-fold.mir

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
22
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
3+
# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=greedy -o - %s | FileCheck %s
4+
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=greedy -o - %s | FileCheck %s
35
--- |
46
define i64 @test_subreg_spill_fold() { ret i64 0 }
57
define i64 @test_subreg_spill_fold2() { ret i64 0 }

llvm/test/CodeGen/MIR/Generic/runPass.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
33
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
44
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
5+
# RUN: llc -passes=greedy -o - %s | FileCheck %s
56

67
# Check that passes are initialized correctly, so that it's possible to
78
# use -run-pass.

llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
2+
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=greedy
23
#PR34502. Check HoistSpill works properly after the live range of spilled
34
#virtual register is cleared.
45
--- |

llvm/test/CodeGen/Thumb/high-reg-clobber.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
44
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
55
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
6+
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=greedy %s -o - | FileCheck %s
67

78
...
89
---

llvm/test/CodeGen/X86/limit-split-cost.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# REQUIRES: asserts
22
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
3+
# RUN: llc -mtriple=x86_64-- -passes=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
34
# Check no global region split is needed because the live range to split is trivially rematerializable.
45
# CHECK-NOT: Compact region bundles
56
--- |
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# REQUIRES x86_64-registered-target
2+
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-new-pm -O3 -regalloc-npm=fast -print-pipeline-passes %s 2>&1 | FileCheck %s
3+
# RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-new-pm -O3 -regalloc-npm=greedy -print-pipeline-passes %s 2>&1 | FileCheck %s --check-prefix=CHECK-GREEDY
4+
5+
# CHECK: regallocfast
6+
# CHECK-GREEDY: greedy<all>

llvm/tools/llc/NewPMDriver.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@
4848

4949
using namespace llvm;
5050

51-
static cl::opt<std::string>
51+
static cl::opt<RegAllocType, false, RegAllocTypeParser>
5252
RegAlloc("regalloc-npm",
5353
cl::desc("Register allocator to use for new pass manager"),
54-
cl::Hidden, cl::init("default"));
54+
cl::Hidden, cl::init(RegAllocType::Unset));
5555

5656
static cl::opt<bool>
5757
DebugPM("debug-pass-manager", cl::Hidden,

0 commit comments

Comments
 (0)