16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
+ #include " AMDGPUGlobalISelUtils.h"
20
+ #include " GCNSubtarget.h"
21
+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19
22
#include " llvm/CodeGen/MachineFunctionPass.h"
23
+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
20
24
#include " llvm/InitializePasses.h"
21
25
22
26
#define DEBUG_TYPE " amdgpu-regbankselect"
@@ -40,6 +44,7 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
40
44
}
41
45
42
46
void getAnalysisUsage (AnalysisUsage &AU) const override {
47
+ AU.addRequired <MachineUniformityAnalysisPass>();
43
48
MachineFunctionPass::getAnalysisUsage (AU);
44
49
}
45
50
@@ -55,6 +60,7 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
55
60
56
61
INITIALIZE_PASS_BEGIN (AMDGPURegBankSelect, DEBUG_TYPE,
57
62
" AMDGPU Register Bank Select" , false , false )
63
+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
58
64
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
59
65
" AMDGPU Register Bank Select" , false , false )
60
66
@@ -66,9 +72,220 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
66
72
return new AMDGPURegBankSelect ();
67
73
}
68
74
75
+ class RegBankSelectHelper {
76
+ MachineFunction &MF;
77
+ MachineIRBuilder &B;
78
+ MachineRegisterInfo &MRI;
79
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
80
+ const MachineUniformityInfo &MUI;
81
+ const SIRegisterInfo &TRI;
82
+ const RegisterBank *SgprRB;
83
+ const RegisterBank *VgprRB;
84
+ const RegisterBank *VccRB;
85
+
86
+ public:
87
+ RegBankSelectHelper (MachineFunction &MF, MachineIRBuilder &B,
88
+ MachineRegisterInfo &MRI,
89
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
90
+ const MachineUniformityInfo &MUI,
91
+ const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
92
+ : MF(MF), B(B), MRI(MRI), ILMA(ILMA), MUI(MUI), TRI(TRI),
93
+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
94
+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
95
+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
96
+
97
+ bool shouldRegBankSelect (MachineInstr &MI) {
98
+ return MI.isPreISelOpcode () || MI.isCopy ();
99
+ }
100
+
101
+ void setRBDef (MachineInstr &MI, MachineOperand &DefOP,
102
+ const RegisterBank *RB) {
103
+ Register Reg = DefOP.getReg ();
104
+ // Register that already has Register class got it during pre-inst selection
105
+ // of another instruction. Maybe cross bank copy was required so we insert a
106
+ // copy that can be removed later. This simplifies post-rb-legalize artifact
107
+ // combiner and avoids need to special case some patterns.
108
+ if (MRI.getRegClassOrNull (Reg)) {
109
+ LLT Ty = MRI.getType (Reg);
110
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
111
+ DefOP.setReg (NewReg);
112
+
113
+ auto &MBB = *MI.getParent ();
114
+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
115
+ B.buildCopy (Reg, NewReg);
116
+
117
+ // The problem was discovered for uniform S1 that was used as both
118
+ // lane mask(vcc) and regular sgpr S1.
119
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
120
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
121
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
122
+ // - the regular sgpr S1(uniform) instruction is now broken since
123
+ // it uses sreg_64_xexec(S1) which is divergent.
124
+
125
+ // "Clear" reg classes from uses on generic instructions and put register
126
+ // banks instead.
127
+ for (auto &UseMI : MRI.use_instructions (Reg)) {
128
+ if (shouldRegBankSelect (UseMI)) {
129
+ for (MachineOperand &Op : UseMI.operands ()) {
130
+ if (Op.isReg () && Op.getReg () == Reg)
131
+ Op.setReg (NewReg);
132
+ }
133
+ }
134
+ }
135
+
136
+ } else {
137
+ MRI.setRegBank (Reg, *RB);
138
+ }
139
+ }
140
+
141
+ void constrainRBUse (MachineInstr &MI, MachineOperand &UseOP,
142
+ const RegisterBank *RB) {
143
+ Register Reg = UseOP.getReg ();
144
+
145
+ LLT Ty = MRI.getType (Reg);
146
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
147
+ UseOP.setReg (NewReg);
148
+
149
+ if (MI.isPHI ()) {
150
+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
151
+ MachineBasicBlock *DefMBB = DefMI->getParent ();
152
+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
153
+ } else {
154
+ B.setInstr (MI);
155
+ }
156
+
157
+ B.buildCopy (NewReg, Reg);
158
+ }
159
+
160
+ std::optional<Register> tryGetVReg (MachineOperand &Op) {
161
+ if (!Op.isReg ())
162
+ return std::nullopt;
163
+
164
+ Register Reg = Op.getReg ();
165
+ if (!Reg.isVirtual ())
166
+ return std::nullopt;
167
+
168
+ return Reg;
169
+ }
170
+
171
+ void assignBanksOnDefs () {
172
+ for (MachineBasicBlock &MBB : MF) {
173
+ for (MachineInstr &MI : MBB) {
174
+ if (!shouldRegBankSelect (MI))
175
+ continue ;
176
+
177
+ for (MachineOperand &DefOP : MI.defs ()) {
178
+ auto MaybeDefReg = tryGetVReg (DefOP);
179
+ if (!MaybeDefReg)
180
+ continue ;
181
+ Register DefReg = *MaybeDefReg;
182
+
183
+ // Copies can have register class on def registers.
184
+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
185
+ continue ;
186
+ }
187
+
188
+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
189
+ setRBDef (MI, DefOP, SgprRB);
190
+ } else {
191
+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
192
+ setRBDef (MI, DefOP, VccRB);
193
+ else
194
+ setRBDef (MI, DefOP, VgprRB);
195
+ }
196
+ }
197
+ }
198
+ }
199
+ }
200
+
201
+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
202
+ // the cycle
203
+ // Note: uniformity analysis does not consider that registers with vgpr def
204
+ // are divergent (you can have uniform value in vgpr).
205
+ // - TODO: implicit use of $exec could be implemented as indicator that
206
+ // instruction is divergent
207
+ bool isTemporalDivergenceCopy (Register Reg) {
208
+ MachineInstr *MI = MRI.getVRegDef (Reg);
209
+ if (!MI->isCopy ())
210
+ return false ;
211
+
212
+ for (auto Op : MI->implicit_operands ()) {
213
+ if (!Op.isReg ())
214
+ continue ;
215
+
216
+ if (Op.getReg () == TRI.getExec ()) {
217
+ return true ;
218
+ }
219
+ }
220
+
221
+ return false ;
222
+ }
223
+
224
+ void constrainBanksOnUses () {
225
+ for (MachineBasicBlock &MBB : MF) {
226
+ for (MachineInstr &MI : MBB) {
227
+ if (!shouldRegBankSelect (MI))
228
+ continue ;
229
+
230
+ // Copies can have register class on use registers.
231
+ if (MI.isCopy ())
232
+ continue ;
233
+
234
+ for (MachineOperand &UseOP : MI.uses ()) {
235
+ auto MaybeUseReg = tryGetVReg (UseOP);
236
+ if (!MaybeUseReg)
237
+ continue ;
238
+ Register UseReg = *MaybeUseReg;
239
+
240
+ // UseReg already has register bank.
241
+ if (MRI.getRegBankOrNull (UseReg))
242
+ continue ;
243
+
244
+ if (!isTemporalDivergenceCopy (UseReg) &&
245
+ (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg))) {
246
+ constrainRBUse (MI, UseOP, SgprRB);
247
+ } else {
248
+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
249
+ constrainRBUse (MI, UseOP, VccRB);
250
+ else
251
+ constrainRBUse (MI, UseOP, VgprRB);
252
+ }
253
+ }
254
+ }
255
+ }
256
+ }
257
+ };
258
+
69
259
bool AMDGPURegBankSelect::runOnMachineFunction (MachineFunction &MF) {
70
260
if (MF.getProperties ().hasProperty (
71
261
MachineFunctionProperties::Property::FailedISel))
72
262
return false ;
263
+
264
+ MachineUniformityInfo &MUI =
265
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
266
+ AMDGPU::IntrinsicLaneMaskAnalyzer ILMA (MF);
267
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
268
+ const SIRegisterInfo &TRI =
269
+ *MF.getSubtarget <GCNSubtarget>().getRegisterInfo ();
270
+ const RegisterBankInfo &RBI = *MF.getSubtarget ().getRegBankInfo ();
271
+
272
+ MachineIRBuilder B (MF);
273
+ RegBankSelectHelper RBSHelper (MF, B, MRI, ILMA, MUI, TRI, RBI);
274
+
275
+ // Assign register banks to ALL def registers on G_ instructions.
276
+ // Same for copies if they have no register bank or class on def.
277
+ RBSHelper.assignBanksOnDefs ();
278
+
279
+ // At this point all virtual registers have register class or bank
280
+ // - Defs of G_ instructions have register banks.
281
+ // - Defs and uses of inst-selected instructions have register class.
282
+ // - Defs and uses of copies can have either register class or bank
283
+ // and most notably
284
+ // - Uses of G_ instructions can have either register class or bank
285
+
286
+ // Reassign uses of G_ instructions to only have register banks.
287
+ RBSHelper.constrainBanksOnUses ();
288
+
289
+ // Defs and uses of G_ instructions have register banks exclusively.
73
290
return true ;
74
291
}
0 commit comments