|
77 | 77 | #include "llvm/CodeGen/MachineLICM.h"
|
78 | 78 | #include "llvm/CodeGen/MachineScheduler.h"
|
79 | 79 | #include "llvm/CodeGen/Passes.h"
|
| 80 | +#include "llvm/CodeGen/RegAllocFast.h" |
80 | 81 | #include "llvm/CodeGen/RegAllocRegistry.h"
|
81 | 82 | #include "llvm/CodeGen/TargetPassConfig.h"
|
82 | 83 | #include "llvm/IR/IntrinsicsAMDGPU.h"
|
@@ -188,6 +189,24 @@ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
|
188 | 189 | cl::init(&useDefaultRegisterAllocator),
|
189 | 190 | cl::desc("Register allocator to use for WWM registers"));
|
190 | 191 |
|
| 192 | +static cl::opt<RegAllocType, false, RegAllocTypeParser> SGPRRegAllocTypeNPM( |
| 193 | + "sgpr-regalloc-npm", cl::Hidden, |
| 194 | + cl::desc("Register allocator to use for SGPRs in new pass " |
| 195 | + "manager"), |
| 196 | + cl::init(RegAllocType::Default)); |
| 197 | + |
| 198 | +static cl::opt<RegAllocType, false, RegAllocTypeParser> VGPRRegAllocTypeNPM( |
| 199 | + "vgpr-regalloc-npm", cl::Hidden, |
| 200 | + cl::desc("Register allocator to use for VGPRs in new pass " |
| 201 | + "manager"), |
| 202 | + cl::init(RegAllocType::Default)); |
| 203 | + |
| 204 | +static cl::opt<RegAllocType, false, RegAllocTypeParser> WWMRegAllocTypeNPM( |
| 205 | + "wwm-regalloc-npm", cl::Hidden, |
| 206 | + cl::desc("Register allocator to use for WWM registers in " |
| 207 | + "new pass manager"), |
| 208 | + cl::init(RegAllocType::Default)); |
| 209 | + |
191 | 210 | static void initializeDefaultSGPRRegisterAllocatorOnce() {
|
192 | 211 | RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
|
193 | 212 |
|
@@ -2140,6 +2159,214 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
|
2140 | 2159 | addPass(SIShrinkInstructionsPass());
|
2141 | 2160 | }
|
2142 | 2161 |
|
| 2162 | +static const char NPMRegAllocOptNotSupportedMessage[] = |
| 2163 | + "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, " |
| 2164 | + "-wwm-regalloc-npm, " |
| 2165 | + "and -vgpr-regalloc-npm"; |
| 2166 | + |
| 2167 | +// void AMDGPUCodeGenPassBuilder::addSGPRRegAlloc(AddMachinePass &addPass, |
| 2168 | +// RegAllocType RAType, RegAllocFilterFunc FilterFunc, bool Optimized) const { |
| 2169 | +// RegAllocType RAType = RegAllocTypeNPM; |
| 2170 | +// if (RAType == RegAllocType::Default) { |
| 2171 | +// RAType = Optimized ? RegAllocType::Greedy : RegAllocType::Fast; |
| 2172 | +// } |
| 2173 | + |
| 2174 | +// if (RAType == RegAllocType::Greedy) { |
| 2175 | +// addPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"})); |
| 2176 | +// return; |
| 2177 | +// } |
| 2178 | + |
| 2179 | +// if (RAType == RegAllocType::Fast) { |
| 2180 | +// addPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false})); |
| 2181 | +// return; |
| 2182 | +// } |
| 2183 | +// report_fatal_error("Unsupported SGPR regalloc type", false); |
| 2184 | +// } |
| 2185 | + |
| 2186 | +// template<typename RegAllocPass> |
| 2187 | +// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass, |
| 2188 | +// RegAllocPass::Options Options) { |
| 2189 | +// addPass(RegAllocPass(Options)); |
| 2190 | +// } |
| 2191 | + |
| 2192 | +// this is the final method |
| 2193 | +// template<typename RegAllocPass> |
| 2194 | +// void AMDGPUCodeGenPassBuilder::addRegAllocOfType(AddMachinePass &addPass, |
| 2195 | +// RegAllocPhase Phase) { |
| 2196 | +// #define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \ |
| 2197 | +// [&]() { \ |
| 2198 | +// if constexpr (std::is_same_v<RegAllocPass, RegAllocFastPass>) { \ |
| 2199 | +// return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \ |
| 2200 | +// } else { \ |
| 2201 | +// return typename RegAllocPass::Options{FilterFunc, Name}; \ |
| 2202 | +// } \ |
| 2203 | +// }() |
| 2204 | + |
| 2205 | +// typename RegAllocPass::Options Options; |
| 2206 | +// RegAllocType RAType; |
| 2207 | + |
| 2208 | +// switch (Phase) { |
| 2209 | +// case RegAllocPhase::SGPR: |
| 2210 | +// Options = RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false); |
| 2211 | +// RAType = SGPRRegAllocTypeNPM; |
| 2212 | +// break; |
| 2213 | +// case RegAllocPhase::WWM: |
| 2214 | +// Options = RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false); |
| 2215 | +// RAType = WWMRegAllocTypeNPM; |
| 2216 | +// break; |
| 2217 | +// case RegAllocPhase::VGPR: |
| 2218 | +// Options = RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true); |
| 2219 | +// RAType = VGPRRegAllocTypeNPM; |
| 2220 | +// break; |
| 2221 | +// }; |
| 2222 | + |
| 2223 | +// switch(RAType) { |
| 2224 | +// case RegAllocType::Greedy: |
| 2225 | +// addPass(RAGreedyPass(Options)); |
| 2226 | +// return; |
| 2227 | +// case RegAllocType::Fast: |
| 2228 | +// addPass(RegAllocFastPass(Options)); |
| 2229 | +// return; |
| 2230 | +// case RegAllocType::Unset: |
| 2231 | +// addPass(RegAllocPass(Options)); |
| 2232 | +// } |
| 2233 | +// #undef RA_OPTIONS |
| 2234 | +// } |
| 2235 | + |
| 2236 | +// template<typename RegAllocPass> |
| 2237 | +// void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass, |
| 2238 | +// RegAllocPhase Phase) { |
| 2239 | +// RegAllocType RAType; |
| 2240 | +// switch(Phase) { |
| 2241 | +// case RegAllocPhase::SGPR: |
| 2242 | +// RAType = SGPRRegAllocTypeNPM; |
| 2243 | +// break; |
| 2244 | +// case RegAllocPhase::WWM: |
| 2245 | +// RAType = WWMRegAllocTypeNPM; |
| 2246 | +// break; |
| 2247 | +// case RegAllocPhase::VGPR: |
| 2248 | +// RAType = VGPRRegAllocTypeNPM; |
| 2249 | +// break; |
| 2250 | +// } |
| 2251 | +// switch (RAType) { |
| 2252 | +// case RegAllocType::Greedy: |
| 2253 | +// addRegAllocOfType(addPass, Phase); |
| 2254 | +// } |
| 2255 | +// addRegAllocOfType<RegAllocPass>(addPass, Phase); |
| 2256 | +// } |
| 2257 | + |
| 2258 | +template <typename RegAllocPassT> |
| 2259 | +typename RegAllocPassT::Options |
| 2260 | +AMDGPUCodeGenPassBuilder::getRAOptionsForPhase(RegAllocPhase Phase) const { |
| 2261 | +#define RA_OPTIONS(FilterFunc, Name, ClearVirtRegs) \ |
| 2262 | + [&]() { \ |
| 2263 | + if constexpr (std::is_same_v<RegAllocPassT, RegAllocFastPass>) { \ |
| 2264 | + return RegAllocFastPass::Options{FilterFunc, Name, ClearVirtRegs}; \ |
| 2265 | + } else { \ |
| 2266 | + return typename RegAllocPassT::Options{FilterFunc, Name}; \ |
| 2267 | + } \ |
| 2268 | + }() |
| 2269 | + |
| 2270 | + switch (Phase) { |
| 2271 | + case RegAllocPhase::SGPR: |
| 2272 | + return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false); |
| 2273 | + case RegAllocPhase::WWM: |
| 2274 | + return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false); |
| 2275 | + case RegAllocPhase::VGPR: |
| 2276 | + return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true); |
| 2277 | + } |
| 2278 | + // static_assert(std::is_same_v<PhaseT, SGPRPhase> || |
| 2279 | + // std::is_same_v<PhaseT, WWMPhase> || |
| 2280 | + // std::is_same_v<PhaseT, VGPRPhase>, |
| 2281 | + // "Unsupported phase type"); |
| 2282 | + |
| 2283 | + // if constexpr(std::is_same_v<PhaseT, SGPRPhase>) { |
| 2284 | + // return RA_OPTIONS(onlyAllocateSGPRs, "sgpr", false); |
| 2285 | + // } else if constexpr(std::is_same_v<PhaseT, WWMPhase>) { |
| 2286 | + // return RA_OPTIONS(onlyAllocateWWMRegs, "wwm", false); |
| 2287 | + // } else if constexpr(std::is_same_v<PhaseT, VGPRPhase>) { |
| 2288 | + // return RA_OPTIONS(onlyAllocateVGPRs, "vgpr", true); |
| 2289 | + // } |
| 2290 | + |
| 2291 | +#undef RA_OPTIONS |
| 2292 | +} |
| 2293 | + |
| 2294 | +template <typename RegAllocPassT> |
| 2295 | +void AMDGPUCodeGenPassBuilder::addRegAlloc(AddMachinePass &addPass, |
| 2296 | + RegAllocPhase Phase) const { |
| 2297 | + RegAllocType RAType; |
| 2298 | + // Read the appropriate phase's regalloc type. |
| 2299 | + switch (Phase) { |
| 2300 | + case RegAllocPhase::SGPR: |
| 2301 | + RAType = SGPRRegAllocTypeNPM; |
| 2302 | + break; |
| 2303 | + case RegAllocPhase::WWM: |
| 2304 | + RAType = WWMRegAllocTypeNPM; |
| 2305 | + break; |
| 2306 | + case RegAllocPhase::VGPR: |
| 2307 | + RAType = VGPRRegAllocTypeNPM; |
| 2308 | + break; |
| 2309 | + } |
| 2310 | + |
| 2311 | + // Construct the pass with the appropriate options. |
| 2312 | + switch (RAType) { |
| 2313 | + case RegAllocType::Greedy: |
| 2314 | + addPass(RAGreedyPass(getRAOptionsForPhase<RAGreedyPass>(Phase))); |
| 2315 | + return; |
| 2316 | + case RegAllocType::Fast: |
| 2317 | + addPass(RegAllocFastPass(getRAOptionsForPhase<RegAllocFastPass>(Phase))); |
| 2318 | + return; |
| 2319 | + case RegAllocType::Unset: |
| 2320 | + addPass(RegAllocPassT(getRAOptionsForPhase<RegAllocPassT>(Phase))); |
| 2321 | + return; |
| 2322 | + default: |
| 2323 | + report_fatal_error("Unsupported regalloc type", false); |
| 2324 | + } |
| 2325 | +} |
| 2326 | + |
| 2327 | +Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( |
| 2328 | + AddMachinePass &addPass) const { |
| 2329 | + if (Opt.RegAlloc != RegAllocType::Unset) |
| 2330 | + return make_error<StringError>(NPMRegAllocOptNotSupportedMessage, |
| 2331 | + inconvertibleErrorCode()); |
| 2332 | + |
| 2333 | + addPass(GCNPreRALongBranchRegPass()); |
| 2334 | + |
| 2335 | + addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::SGPR); |
| 2336 | + |
| 2337 | + // Commit allocated register changes. This is mostly necessary because too |
| 2338 | + // many things rely on the use lists of the physical registers, such as the |
| 2339 | + // verifier. This is only necessary with allocators which use LiveIntervals, |
| 2340 | + // since FastRegAlloc does the replacements itself. |
| 2341 | + // TODO: addPass(VirtRegRewriterPass(false)); |
| 2342 | + |
| 2343 | + // At this point, the sgpr-regalloc has been done and it is good to have the |
| 2344 | + // stack slot coloring to try to optimize the SGPR spill stack indices before |
| 2345 | + // attempting the custom SGPR spill lowering. |
| 2346 | + addPass(StackSlotColoringPass()); |
| 2347 | + |
| 2348 | + // Equivalent of PEI for SGPRs. |
| 2349 | + addPass(SILowerSGPRSpillsPass()); |
| 2350 | + |
| 2351 | + // To Allocate wwm registers used in whole quad mode operations (for shaders). |
| 2352 | + addPass(SIPreAllocateWWMRegsPass()); |
| 2353 | + |
| 2354 | + // For allocating other wwm register operands. |
| 2355 | + addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::WWM); |
| 2356 | + addPass(SILowerWWMCopiesPass()); |
| 2357 | + // TODO: addPass(VirtRegRewriterPass(false)); |
| 2358 | + // TODO: addPass(AMDGPUReserveWWMRegsPass()); |
| 2359 | + |
| 2360 | + // For allocating per-thread VGPRs. |
| 2361 | + addRegAlloc<RAGreedyPass>(addPass, RegAllocPhase::VGPR); |
| 2362 | + |
| 2363 | + // TODO: addPreRewrite(); |
| 2364 | + addPass(VirtRegRewriterPass(false)); |
| 2365 | + |
| 2366 | + // TODO: addPass(AMDGPUMarkLastScratchLoadPass()); |
| 2367 | + return Error::success(); |
| 2368 | +} |
| 2369 | + |
2143 | 2370 | void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
|
2144 | 2371 | addPass(SIFixVGPRCopiesPass());
|
2145 | 2372 | if (TM.getOptLevel() > CodeGenOptLevel::None)
|
|
0 commit comments