16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
+ #include " AMDGPUGlobalISelUtils.h"
20
+ #include " GCNSubtarget.h"
21
+ #include " llvm/CodeGen/GlobalISel/CSEInfo.h"
22
+ #include " llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
23
+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19
24
#include " llvm/CodeGen/MachineFunctionPass.h"
25
+ #include " llvm/CodeGen/MachineInstr.h"
26
+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
27
+ #include " llvm/CodeGen/TargetPassConfig.h"
20
28
#include " llvm/InitializePasses.h"
21
29
22
30
#define DEBUG_TYPE " amdgpu-regbankselect"
23
31
24
32
using namespace llvm ;
33
+ using namespace AMDGPU ;
25
34
26
35
namespace {
27
36
@@ -40,6 +49,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
40
49
}
41
50
42
51
void getAnalysisUsage (AnalysisUsage &AU) const override {
52
+ AU.addRequired <TargetPassConfig>();
53
+ AU.addRequired <GISelCSEAnalysisWrapperPass>();
54
+ AU.addRequired <MachineUniformityAnalysisPass>();
43
55
MachineFunctionPass::getAnalysisUsage (AU);
44
56
}
45
57
@@ -55,6 +67,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
55
67
56
68
INITIALIZE_PASS_BEGIN (AMDGPURegBankSelect, DEBUG_TYPE,
57
69
" AMDGPU Register Bank Select" , false , false )
70
+ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
71
+ INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
72
+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
58
73
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
59
74
" AMDGPU Register Bank Select" , false , false )
60
75
@@ -66,9 +81,191 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
66
81
return new AMDGPURegBankSelect ();
67
82
}
68
83
84
+ class RegBankSelectHelper {
85
+ MachineIRBuilder &B;
86
+ MachineRegisterInfo &MRI;
87
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
88
+ const MachineUniformityInfo &MUI;
89
+ const RegisterBank *SgprRB;
90
+ const RegisterBank *VgprRB;
91
+ const RegisterBank *VccRB;
92
+
93
+ public:
94
+ RegBankSelectHelper (MachineIRBuilder &B,
95
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
96
+ const MachineUniformityInfo &MUI,
97
+ const RegisterBankInfo &RBI)
98
+ : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI),
99
+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
100
+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
101
+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
102
+
103
+ void setRegBankDef (MachineInstr &MI, MachineOperand &DefOP,
104
+ const RegisterBank *RB) {
105
+ Register Reg = DefOP.getReg ();
106
+
107
+ if (!MRI.getRegClassOrNull (Reg)) {
108
+ MRI.setRegBank (Reg, *RB);
109
+ return ;
110
+ }
111
+
112
+ // Register that already has Register class got it during pre-inst selection
113
+ // of another instruction. Maybe cross bank copy was required so we insert a
114
+ // copy that can be removed later. This simplifies post regbanklegalize
115
+ // combiner and avoids need to special case some patterns.
116
+ LLT Ty = MRI.getType (Reg);
117
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
118
+ DefOP.setReg (NewReg);
119
+
120
+ auto &MBB = *MI.getParent ();
121
+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
122
+ B.buildCopy (Reg, NewReg);
123
+
124
+ // The problem was discovered for uniform S1 that was used as both
125
+ // lane mask(vcc) and regular sgpr S1.
126
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
127
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
128
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
129
+ // - the regular sgpr S1(uniform) instruction is now broken since
130
+ // it uses sreg_64_xexec(S1) which is divergent.
131
+
132
+ // Replace virtual registers with register class on generic instructions
133
+ // uses with virtual registers with register bank.
134
+ for (auto &UseMI : make_early_inc_range (MRI.use_instructions (Reg))) {
135
+ if (UseMI.isPreISelOpcode ()) {
136
+ for (MachineOperand &Op : UseMI.operands ()) {
137
+ if (Op.isReg () && Op.getReg () == Reg)
138
+ Op.setReg (NewReg);
139
+ }
140
+ }
141
+ }
142
+ }
143
+
144
+ Register tryGetVReg (MachineOperand &Op) {
145
+ if (!Op.isReg ())
146
+ return {};
147
+
148
+ Register Reg = Op.getReg ();
149
+ if (!Reg.isVirtual ())
150
+ return {};
151
+
152
+ return Reg;
153
+ }
154
+
155
+ void assignBanksOnDefs (MachineInstr &MI) {
156
+ for (MachineOperand &DefOP : MI.defs ()) {
157
+ Register DefReg = tryGetVReg (DefOP);
158
+ if (!DefReg.isValid ())
159
+ continue ;
160
+
161
+ // Copies can have register class on def registers.
162
+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
163
+ continue ;
164
+ }
165
+
166
+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
167
+ setRegBankDef (MI, DefOP, SgprRB);
168
+ } else {
169
+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
170
+ setRegBankDef (MI, DefOP, VccRB);
171
+ else
172
+ setRegBankDef (MI, DefOP, VgprRB);
173
+ }
174
+ }
175
+ }
176
+
177
+ void constrainRegBankUse (MachineInstr &MI, MachineOperand &UseOP,
178
+ const RegisterBank *RB) {
179
+ Register Reg = UseOP.getReg ();
180
+
181
+ LLT Ty = MRI.getType (Reg);
182
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
183
+ UseOP.setReg (NewReg);
184
+
185
+ if (MI.isPHI ()) {
186
+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
187
+ MachineBasicBlock *DefMBB = DefMI->getParent ();
188
+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
189
+ } else {
190
+ B.setInstr (MI);
191
+ }
192
+
193
+ B.buildCopy (NewReg, Reg);
194
+ }
195
+
196
+ void constrainBanksOnUses (MachineInstr &MI) {
197
+ for (MachineOperand &UseOP : MI.uses ()) {
198
+ auto UseReg = tryGetVReg (UseOP);
199
+ if (!UseReg.isValid ())
200
+ continue ;
201
+
202
+ // UseReg already has register bank.
203
+ if (MRI.getRegBankOrNull (UseReg))
204
+ continue ;
205
+
206
+ if (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg)) {
207
+ constrainRegBankUse (MI, UseOP, SgprRB);
208
+ } else {
209
+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
210
+ constrainRegBankUse (MI, UseOP, VccRB);
211
+ else
212
+ constrainRegBankUse (MI, UseOP, VgprRB);
213
+ }
214
+ }
215
+ }
216
+ };
217
+
69
218
bool AMDGPURegBankSelect::runOnMachineFunction (MachineFunction &MF) {
70
219
if (MF.getProperties ().hasProperty (
71
220
MachineFunctionProperties::Property::FailedISel))
72
221
return false ;
222
+
223
+ // Setup the instruction builder with CSE.
224
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
225
+ GISelCSEAnalysisWrapper &Wrapper =
226
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper ();
227
+ GISelCSEInfo &CSEInfo = Wrapper.get (TPC.getCSEConfig ());
228
+ GISelObserverWrapper Observer;
229
+ Observer.addObserver (&CSEInfo);
230
+
231
+ CSEMIRBuilder B (MF);
232
+ B.setCSEInfo (&CSEInfo);
233
+ B.setChangeObserver (Observer);
234
+
235
+ RAIIDelegateInstaller DelegateInstaller (MF, &Observer);
236
+ RAIIMFObserverInstaller MFObserverInstaller (MF, Observer);
237
+
238
+ IntrinsicLaneMaskAnalyzer ILMA (MF);
239
+ MachineUniformityInfo &MUI =
240
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
241
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
242
+ RegBankSelectHelper RBSHelper (B, ILMA, MUI, *ST.getRegBankInfo ());
243
+
244
+ // Assign register banks to ALL def registers on G_ instructions.
245
+ // Same for copies if they have no register bank or class on def.
246
+ for (MachineBasicBlock &MBB : MF) {
247
+ for (MachineInstr &MI : MBB) {
248
+ if (MI.isPreISelOpcode () || MI.isCopy ())
249
+ RBSHelper.assignBanksOnDefs (MI);
250
+ }
251
+ }
252
+
253
+ // At this point all virtual registers have register class or bank
254
+ // - Defs of G_ instructions have register banks.
255
+ // - Defs and uses of inst-selected instructions have register class.
256
+ // - Defs and uses of copies can have either register class or bank
257
+ // and most notably:
258
+ // - Uses of G_ instructions can have either register class or bank.
259
+
260
+ // Reassign use registers of G_ instructions to only have register banks.
261
+ for (MachineBasicBlock &MBB : MF) {
262
+ for (MachineInstr &MI : MBB) {
263
+ // Copies are skipped since they can have register class on use registers.
264
+ if (MI.isPreISelOpcode ())
265
+ RBSHelper.constrainBanksOnUses (MI);
266
+ }
267
+ }
268
+
269
+ // Defs and uses of G_ instructions have register banks exclusively.
73
270
return true ;
74
271
}
0 commit comments