Skip to content

Commit ff34aa1

Browse files
AMDGPU/GlobalISel: Add skeletons for new register bank select passes
New register bank select for AMDGPU will be split in two passes: RBSelect: select banks based on machine uniformity analysis RBLegalize: lower instructions that can't be inst-selected with register banks assigned by RBSelect. Does not change already assigned banks. Similar to legalizer but with context of uniformity analysis. RBLegalize main goal is to provide high level table-like overview of how to lower generic instructions based on available target features and uniformity info (uniform vs divergent). See RegBankLegalizeRules. Summary of new features: At the moment reg bank select assigns register bank to output register using simple algorithm: - one of the inputs is vgpr output is vgpr - all inputs are sgpr output is sgpr. When function does not contain divergent control flow propagating reg banks like this works. In general, first point is still correct but second is not when function contains divergent control flow. Examples: - Phi with uniform inputs that go through divergent branch - Instruction with temporal divergent use. To fix this RB-select will use machine uniformity analysis to assign vgpr to each divergent and sgpr to each uniform instruction. But some instructions are only available on VALU (for example floating point instructions before gfx1150) and we need to assign vgpr to them. Since we are no longer propagating reg banks we need to ensure that uniform instructions get their inputs in sgpr in some way. In RB-legalize uniform instructions that are only available on VALU will be reassigned to vgpr on all operands and readfirstlane vgpr output to original sgpr output.
1 parent abfba7d commit ff34aa1

11 files changed

+4037
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ FunctionPass *createSIFoldOperandsLegacyPass();
3939
FunctionPass *createSIPeepholeSDWALegacyPass();
4040
FunctionPass *createSILowerI1CopiesLegacyPass();
4141
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
42+
FunctionPass *createAMDGPURBSelectPass();
43+
FunctionPass *createAMDGPURBLegalizePass();
4244
FunctionPass *createSIShrinkInstructionsLegacyPass();
4345
FunctionPass *createSILoadStoreOptimizerLegacyPass();
4446
FunctionPass *createSIWholeQuadModePass();
@@ -188,6 +190,12 @@ extern char &SILowerI1CopiesLegacyID;
188190
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
189191
extern char &AMDGPUGlobalISelDivergenceLoweringID;
190192

193+
void initializeAMDGPURBSelectPass(PassRegistry &);
194+
extern char &AMDGPURBSelectID;
195+
196+
void initializeAMDGPURBLegalizePass(PassRegistry &);
197+
extern char &AMDGPURBLegalizeID;
198+
191199
void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
192200
extern char &AMDGPUMarkLastScratchLoadID;
193201

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
//===-- AMDGPURBLegalize.cpp ----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// Lower G_ instructions that can't be inst-selected with register bank
10+
/// assignment given by RB-select based on machine uniformity info.
11+
/// Given types on all operands, some register bank assignments require lowering
12+
/// while other do not.
13+
/// Note: cases where all register bank assignments would require lowering are
14+
/// lowered in legalizer.
15+
/// For example vgpr S64 G_AND requires lowering to S32 while SGPR S64 does not.
16+
/// Eliminate sgpr S1 by lowering to sgpr S32.
17+
//
18+
//===----------------------------------------------------------------------===//
19+
20+
#include "AMDGPU.h"
21+
#include "llvm/CodeGen/MachineFunctionPass.h"
22+
#include "llvm/InitializePasses.h"
23+
24+
#define DEBUG_TYPE "rb-legalize"
25+
26+
using namespace llvm;
27+
28+
namespace {
29+
30+
class AMDGPURBLegalize : public MachineFunctionPass {
31+
public:
32+
static char ID;
33+
34+
public:
35+
AMDGPURBLegalize() : MachineFunctionPass(ID) {
36+
initializeAMDGPURBLegalizePass(*PassRegistry::getPassRegistry());
37+
}
38+
39+
bool runOnMachineFunction(MachineFunction &MF) override;
40+
41+
StringRef getPassName() const override { return "AMDGPU RB Legalize"; }
42+
43+
void getAnalysisUsage(AnalysisUsage &AU) const override {
44+
MachineFunctionPass::getAnalysisUsage(AU);
45+
}
46+
47+
// If there were no phis and we do waterfall expansion machine verifier would
48+
// fail.
49+
MachineFunctionProperties getClearedProperties() const override {
50+
return MachineFunctionProperties().set(
51+
MachineFunctionProperties::Property::NoPHIs);
52+
}
53+
};
54+
55+
} // End anonymous namespace.
56+
57+
INITIALIZE_PASS_BEGIN(AMDGPURBLegalize, DEBUG_TYPE, "AMDGPU RB Legalize", false,
58+
false)
59+
INITIALIZE_PASS_END(AMDGPURBLegalize, DEBUG_TYPE, "AMDGPU RB Legalize", false,
60+
false)
61+
62+
char AMDGPURBLegalize::ID = 0;
63+
64+
char &llvm::AMDGPURBLegalizeID = AMDGPURBLegalize::ID;
65+
66+
FunctionPass *llvm::createAMDGPURBLegalizePass() {
67+
return new AMDGPURBLegalize();
68+
}
69+
70+
using namespace AMDGPU;
71+
72+
bool AMDGPURBLegalize::runOnMachineFunction(MachineFunction &MF) {
73+
return true;
74+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//===-- AMDGPURBSelect.cpp ------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
/// Assign register banks to all register operands of G_ instructions using
10+
/// machine uniformity analysis.
11+
/// SGPR - uniform values and some lane masks
12+
/// VGPR - divergent, non S1, values
13+
/// VCC - divergent S1 values(lane masks)
14+
/// However in some cases G_ instructions with this register bank assignment
15+
/// can't be inst-selected. This is solved in RBLegalize.
16+
//===----------------------------------------------------------------------===//
17+
18+
#include "AMDGPU.h"
19+
#include "llvm/CodeGen/MachineFunctionPass.h"
20+
#include "llvm/InitializePasses.h"
21+
22+
#define DEBUG_TYPE "rb-select"
23+
24+
using namespace llvm;
25+
26+
namespace {
27+
28+
class AMDGPURBSelect : public MachineFunctionPass {
29+
public:
30+
static char ID;
31+
32+
public:
33+
AMDGPURBSelect() : MachineFunctionPass(ID) {
34+
initializeAMDGPURBSelectPass(*PassRegistry::getPassRegistry());
35+
}
36+
37+
bool runOnMachineFunction(MachineFunction &MF) override;
38+
39+
StringRef getPassName() const override { return "AMDGPU RB select"; }
40+
41+
void getAnalysisUsage(AnalysisUsage &AU) const override {
42+
MachineFunctionPass::getAnalysisUsage(AU);
43+
}
44+
45+
// This pass assigns register banks to all virtual registers, and we maintain
46+
// this property in subsequent passes
47+
MachineFunctionProperties getSetProperties() const override {
48+
return MachineFunctionProperties().set(
49+
MachineFunctionProperties::Property::RegBankSelected);
50+
}
51+
};
52+
53+
} // End anonymous namespace.
54+
55+
INITIALIZE_PASS_BEGIN(AMDGPURBSelect, DEBUG_TYPE, "AMDGPU RB select", false,
56+
false)
57+
INITIALIZE_PASS_END(AMDGPURBSelect, DEBUG_TYPE, "AMDGPU RB select", false,
58+
false)
59+
60+
char AMDGPURBSelect::ID = 0;
61+
62+
char &llvm::AMDGPURBSelectID = AMDGPURBSelect::ID;
63+
64+
FunctionPass *llvm::createAMDGPURBSelectPass() { return new AMDGPURBSelect(); }
65+
66+
bool AMDGPURBSelect::runOnMachineFunction(MachineFunction &MF) { return true; }

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,11 @@ static cl::opt<bool>
443443
cl::desc("Enable AMDGPUAttributorPass"),
444444
cl::init(true), cl::Hidden);
445445

446+
static cl::opt<bool> NewRegBankSelect(
447+
"new-reg-bank-select",
448+
cl::desc("Run rb-select and rb-legalize instead of amdgpu-regbankselect"),
449+
cl::init(false), cl::Hidden);
450+
446451
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
447452
// Register the target
448453
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
@@ -459,6 +464,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
459464
initializeGCNDPPCombineLegacyPass(*PR);
460465
initializeSILowerI1CopiesLegacyPass(*PR);
461466
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
467+
initializeAMDGPURBSelectPass(*PR);
468+
initializeAMDGPURBLegalizePass(*PR);
462469
initializeSILowerWWMCopiesPass(*PR);
463470
initializeAMDGPUMarkLastScratchLoadPass(*PR);
464471
initializeSILowerSGPRSpillsLegacyPass(*PR);
@@ -1371,7 +1378,12 @@ void GCNPassConfig::addPreRegBankSelect() {
13711378
}
13721379

13731380
bool GCNPassConfig::addRegBankSelect() {
1374-
addPass(new AMDGPURegBankSelect());
1381+
if (NewRegBankSelect) {
1382+
addPass(createAMDGPURBSelectPass());
1383+
addPass(createAMDGPURBLegalizePass());
1384+
} else {
1385+
addPass(new AMDGPURegBankSelect());
1386+
}
13751387
return false;
13761388
}
13771389

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ add_llvm_target(AMDGPUCodeGen
5757
AMDGPUExportClustering.cpp
5858
AMDGPUFrameLowering.cpp
5959
AMDGPUGlobalISelDivergenceLowering.cpp
60+
AMDGPURBSelect.cpp
61+
AMDGPURBLegalize.cpp
6062
AMDGPUGlobalISelUtils.cpp
6163
AMDGPUHSAMetadataStreamer.cpp
6264
AMDGPUInsertDelayAlu.cpp

0 commit comments

Comments
 (0)