Skip to content

Commit c518b0b

Browse files
committed
[AMDGPU][NPM] Support -{phase}-regalloc-npm options
1 parent 302b9f0 commit c518b0b

File tree

2 files changed

+262
-0
lines changed

2 files changed

+262
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
#include "llvm/CodeGen/MachineLICM.h"
7878
#include "llvm/CodeGen/MachineScheduler.h"
7979
#include "llvm/CodeGen/Passes.h"
80+
#include "llvm/CodeGen/RegAllocFast.h"
8081
#include "llvm/CodeGen/RegAllocRegistry.h"
8182
#include "llvm/CodeGen/TargetPassConfig.h"
8283
#include "llvm/IR/IntrinsicsAMDGPU.h"
@@ -188,6 +189,24 @@ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
188189
cl::init(&useDefaultRegisterAllocator),
189190
cl::desc("Register allocator to use for WWM registers"));
190191

192+
static cl::opt<RegAllocType, false, RegAllocTypeParser> SGPRRegAllocTypeNPM(
193+
"sgpr-regalloc-npm", cl::Hidden,
194+
cl::desc("Register allocator to use for SGPRs in new pass "
195+
"manager"),
196+
cl::init(RegAllocType::Default));
197+
198+
static cl::opt<RegAllocType, false, RegAllocTypeParser> VGPRRegAllocTypeNPM(
199+
"vgpr-regalloc-npm", cl::Hidden,
200+
cl::desc("Register allocator to use for VGPRs in new pass "
201+
"manager"),
202+
cl::init(RegAllocType::Default));
203+
204+
static cl::opt<RegAllocType, false, RegAllocTypeParser> WWMRegAllocTypeNPM(
205+
"wwm-regalloc-npm", cl::Hidden,
206+
cl::desc("Register allocator to use for WWM registers in "
207+
"new pass manager"),
208+
cl::init(RegAllocType::Default));
209+
191210
static void initializeDefaultSGPRRegisterAllocatorOnce() {
192211
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
193212

@@ -2140,6 +2159,214 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
21402159
addPass(SIShrinkInstructionsPass());
21412160
}
21422161

2162+
static const char NPMRegAllocOptNotSupportedMessage[] =
2163+
"-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
2164+
"-wwm-regalloc-npm, "
2165+
"and -vgpr-regalloc-npm";
2166+
2167+
// void AMDGPUCodeGenPassBuilder::addSGPRRegAlloc(AddMachinePass &addPass,
2168+
// RegAllocType RAType, RegAllocFilterFunc FilterFunc, bool Optimized) const {
2169+
// RegAllocType RAType = RegAllocTypeNPM;
2170+
// if (RAType == RegAllocType::Default) {
2171+
// RAType = Optimized ? RegAllocType::Greedy : RegAllocType::Fast;
2172+
// }
2173+
2174+
// if (RAType == RegAllocType::Greedy) {
2175+
// addPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}));
2176+
// return;
2177+
// }
2178+
2179+
// if (RAType == RegAllocType::Fast) {
2180+
// addPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}));
2181+
// return;
2182+
// }
2183+
// report_fatal_error("Unsupported SGPR regalloc type", false);
2184+
// }
2185+
2186+
// template<typename RegAllocPass>
2187+
// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass,
2188+
// RegAllocPass::Options Options) {
2189+
// addPass(RegAllocPass(Options));
2190+
// }
2191+
2192+
// this is the final method
2193+
// template<typename RegAllocPass>
2194+
// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass,
2195+
// RegAllocPhase Phase) {
2196+
// #define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \
2197+
// [&]() { \
2198+
// if constexpr (std::is_same_v<RegAllocPass, RegAllocFastPass>) { \
2199+
// return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \
2200+
// } else { \
2201+
// return typename RegAllocPass::Options{FilterFunc, Name}; \
2202+
// } \
2203+
// }()
2204+
2205+
// typename RegAllocPass::Options Options;
2206+
// RegAllocType RAType;
2207+
2208+
// switch (Phase) {
2209+
// case RegAllocPhase::SGPR:
2210+
// Options = RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false);
2211+
// RAType = SGPRRegAllocTypeNPM;
2212+
// break;
2213+
// case RegAllocPhase::WWM:
2214+
// Options = RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false);
2215+
// RAType = WWMRegAllocTypeNPM;
2216+
// break;
2217+
// case RegAllocPhase::VGPR:
2218+
// Options = RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true);
2219+
// RAType = VGPRRegAllocTypeNPM;
2220+
// break;
2221+
// };
2222+
2223+
// switch(RAType) {
2224+
// case RegAllocType::Greedy:
2225+
// addPass(RAGreedyPass(Options));
2226+
// return;
2227+
// case RegAllocType::Fast:
2228+
// addPass(RegAllocFastPass(Options));
2229+
// return;
2230+
// case RegAllocType::Unset:
2231+
// addPass(RegAllocPass(Options));
2232+
// }
2233+
// #undef RA_OPTIONS
2234+
// }
2235+
2236+
// template<typename RegAllocPass>
2237+
// void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass,
2238+
// RegAllocPhase Phase) {
2239+
// RegAllocType RAType;
2240+
// switch(Phase) {
2241+
// case RegAllocPhase::SGPR:
2242+
// RAType = SGPRRegAllocTypeNPM;
2243+
// break;
2244+
// case RegAllocPhase::WWM:
2245+
// RAType = WWMRegAllocTypeNPM;
2246+
// break;
2247+
// case RegAllocPhase::VGPR:
2248+
// RAType = VGPRRegAllocTypeNPM;
2249+
// break;
2250+
// }
2251+
// switch (RAType) {
2252+
// case RegAllocType::Greedy:
2253+
// addRegAllocOfType(addPass, Phase);
2254+
// }
2255+
// addRegAllocOfType<RegAllocPass>(addPass, Phase);
2256+
// }
2257+
2258+
template <typename RegAllocPassT>
2259+
typename RegAllocPassT::Options
2260+
AMDGPUCodeGenPassBuilder::getRAOptionsForPhase(RegAllocPhase Phase) const {
2261+
#define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \
2262+
[&]() { \
2263+
if constexpr (std::is_same_v<RegAllocPassT, RegAllocFastPass>) { \
2264+
return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \
2265+
} else { \
2266+
return typename RegAllocPassT::Options{FilterFunc, Name}; \
2267+
} \
2268+
}()
2269+
2270+
switch (Phase) {
2271+
case RegAllocPhase::SGPR:
2272+
return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false);
2273+
case RegAllocPhase::WWM:
2274+
return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false);
2275+
case RegAllocPhase::VGPR:
2276+
return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true);
2277+
}
2278+
// static_assert(std::is_same_v<PhaseT, SGPRPhase> ||
2279+
// std::is_same_v<PhaseT, WWMPhase> ||
2280+
// std::is_same_v<PhaseT, VGPRPhase>,
2281+
// "Unsupported phase type");
2282+
2283+
// if constexpr(std::is_same_v<PhaseT, SGPRPhase>) {
2284+
// return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false);
2285+
// } else if constexpr(std::is_same_v<PhaseT, WWMPhase>) {
2286+
// return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false);
2287+
// } else if constexpr(std::is_same_v<PhaseT, VGPRPhase>) {
2288+
// return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true);
2289+
// }
2290+
2291+
#undef RA_OPTIONS
2292+
}
2293+
2294+
template <typename RegAllocPassT>
2295+
void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass,
2296+
RegAllocPhase Phase) const {
2297+
RegAllocType RAType;
2298+
// Read the appropriate phase's regalloc type.
2299+
switch (Phase) {
2300+
case RegAllocPhase::SGPR:
2301+
RAType = SGPRRegAllocTypeNPM;
2302+
break;
2303+
case RegAllocPhase::WWM:
2304+
RAType = WWMRegAllocTypeNPM;
2305+
break;
2306+
case RegAllocPhase::VGPR:
2307+
RAType = VGPRRegAllocTypeNPM;
2308+
break;
2309+
}
2310+
2311+
// Construct the pass with the appropriate options.
2312+
switch (RAType) {
2313+
case RegAllocType::Greedy:
2314+
addPass(RAGreedyPass(getRAOptionsForPhase<RAGreedyPass>(Phase)));
2315+
return;
2316+
case RegAllocType::Fast:
2317+
addPass(RegAllocFastPass(getRAOptionsForPhase<RegAllocFastPass>(Phase)));
2318+
return;
2319+
case RegAllocType::Unset:
2320+
addPass(RegAllocPassT(getRAOptionsForPhase<RegAllocPassT>(Phase)));
2321+
return;
2322+
default:
2323+
report_fatal_error("Unsupported regalloc type", false);
2324+
}
2325+
}
2326+
2327+
Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
2328+
AddMachinePass &addPass) const {
2329+
if (Opt.RegAlloc != RegAllocType::Unset)
2330+
return make_error<StringError>(NPMRegAllocOptNotSupportedMessage,
2331+
inconvertibleErrorCode());
2332+
2333+
addPass(GCNPreRALongBranchRegPass());
2334+
2335+
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::SGPR);
2336+
2337+
// Commit allocated register changes. This is mostly necessary because too
2338+
// many things rely on the use lists of the physical registers, such as the
2339+
// verifier. This is only necessary with allocators which use LiveIntervals,
2340+
// since FastRegAlloc does the replacements itself.
2341+
// TODO: addPass(VirtRegRewriterPass(false));
2342+
2343+
// At this point, the sgpr-regalloc has been done and it is good to have the
2344+
// stack slot coloring to try to optimize the SGPR spill stack indices before
2345+
// attempting the custom SGPR spill lowering.
2346+
addPass(StackSlotColoringPass());
2347+
2348+
// Equivalent of PEI for SGPRs.
2349+
addPass(SILowerSGPRSpillsPass());
2350+
2351+
// To Allocate wwm registers used in whole quad mode operations (for shaders).
2352+
addPass(SIPreAllocateWWMRegsPass());
2353+
2354+
// For allocating other wwm register operands.
2355+
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::WWM);
2356+
addPass(SILowerWWMCopiesPass());
2357+
// TODO: addPass(VirtRegRewriterPass(false));
2358+
// TODO: addPass(AMDGPUReserveWWMRegsPass());
2359+
2360+
// For allocating per-thread VGPRs.
2361+
addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::VGPR);
2362+
2363+
// TODO: addPreRewrite();
2364+
addPass(VirtRegRewriterPass(false));
2365+
2366+
// TODO: addPass(AMDGPUMarkLastScratchLoadPass());
2367+
return Error::success();
2368+
}
2369+
21432370
void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
21442371
addPass(SIFixVGPRCopiesPass());
21452372
if (TM.getOptLevel() > CodeGenOptLevel::None)

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@
1616

1717
#include "GCNSubtarget.h"
1818
#include "llvm/CodeGen/CodeGenTargetMachineImpl.h"
19+
#include "llvm/CodeGen/RegAllocCommon.h"
1920
#include "llvm/CodeGen/TargetPassConfig.h"
2021
#include "llvm/MC/MCStreamer.h"
2122
#include "llvm/Passes/CodeGenPassBuilder.h"
23+
#include "llvm/Target/CGPassBuilderOption.h"
2224
#include <optional>
2325
#include <utility>
2426

@@ -179,6 +181,7 @@ class AMDGPUCodeGenPassBuilder
179181
Error addInstSelector(AddMachinePass &) const;
180182
void addPreRewrite(AddMachinePass &) const;
181183
void addMachineSSAOptimization(AddMachinePass &) const;
184+
Error addRegAssignmentOptimized(AddMachinePass &) const;
182185
void addPostRegAlloc(AddMachinePass &) const;
183186
void addPreEmitPass(AddMachinePass &) const;
184187

@@ -189,6 +192,38 @@ class AMDGPUCodeGenPassBuilder
189192
CodeGenOptLevel Level = CodeGenOptLevel::Default) const;
190193
void addEarlyCSEOrGVNPass(AddIRPass &) const;
191194
void addStraightLineScalarOptimizationPasses(AddIRPass &) const;
195+
196+
private:
197+
// /// Dummy structs to represent different phases of register allocation.
198+
// struct SGPRPhase{};
199+
// struct VGPRPhase{};
200+
// struct WWMPhase{};
201+
enum class RegAllocPhase { SGPR, VGPR, WWM };
202+
203+
template <typename RegAllocPassT>
204+
typename RegAllocPassT::Options getRAOptionsForPhase(RegAllocPhase) const;
205+
206+
/// \brief Add register allocation pass to the pass manager.
207+
/// This checks for the regalloc type given through
208+
/// -{phase}-regalloc-npm={type} cl option. If the option is not specified, it
209+
/// uses the preferred regalloc pass type.
210+
///
211+
/// \tparam PreferredRegAllocPassT The fallback reg alloc pass type to use if
212+
/// cl::opt is unspecified.
213+
/// \param Phase The phase of register allocation to add.
214+
template <typename PreferredRegAllocPassT>
215+
void addRegAlloc(AddMachinePass &, RegAllocPhase Phase) const;
216+
217+
// instantiate the template for each phase
218+
/// Add the register allocation pass for given filter func and type
219+
/// (greedy/fast).
220+
/// @param Type If RegAllocType::Default, add according to the optimization
221+
/// level.
222+
// void addRegAllocPass(AddMachinePass &, RegAllocType Type,
223+
// RegAllocFilterFunc FilterFunc) const;
224+
void addSGPRRegAlloc(AddMachinePass &, bool Optimized) const;
225+
void addWWMRegAlloc(AddMachinePass &, bool Optimized) const;
226+
void addVGPRRegAlloc(AddMachinePass &, bool Optimized) const;
192227
};
193228

194229
} // end namespace llvm

0 commit comments

Comments
 (0)