16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
+ #include " AMDGPUGlobalISelUtils.h"
20
+ #include " AMDGPURegisterBankInfo.h"
21
+ #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
22
+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
19
23
#include " llvm/CodeGen/MachineFunctionPass.h"
24
+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
20
25
#include " llvm/InitializePasses.h"
21
26
22
27
#define DEBUG_TYPE " rb-select"
@@ -39,6 +44,7 @@ class AMDGPURBSelect : public MachineFunctionPass {
39
44
StringRef getPassName () const override { return " AMDGPU RB select" ; }
40
45
41
46
void getAnalysisUsage (AnalysisUsage &AU) const override {
47
+ AU.addRequired <MachineUniformityAnalysisPass>();
42
48
MachineFunctionPass::getAnalysisUsage (AU);
43
49
}
44
50
@@ -54,6 +60,7 @@ class AMDGPURBSelect : public MachineFunctionPass {
54
60
55
61
INITIALIZE_PASS_BEGIN (AMDGPURBSelect, DEBUG_TYPE, " AMDGPU RB select" , false ,
56
62
false )
63
+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
57
64
INITIALIZE_PASS_END(AMDGPURBSelect, DEBUG_TYPE, " AMDGPU RB select" , false ,
58
65
false )
59
66
@@ -63,4 +70,189 @@ char &llvm::AMDGPURBSelectID = AMDGPURBSelect::ID;
63
70
64
71
FunctionPass *llvm::createAMDGPURBSelectPass () { return new AMDGPURBSelect (); }
65
72
66
- bool AMDGPURBSelect::runOnMachineFunction (MachineFunction &MF) { return true ; }
73
+ bool shouldRBSelect (MachineInstr &MI) {
74
+ if (isTargetSpecificOpcode (MI.getOpcode ()) && !MI.isPreISelOpcode ())
75
+ return false ;
76
+
77
+ if (MI.getOpcode () == AMDGPU::PHI || MI.getOpcode () == AMDGPU::IMPLICIT_DEF)
78
+ return false ;
79
+
80
+ if (MI.isInlineAsm ())
81
+ return false ;
82
+
83
+ return true ;
84
+ }
85
+
86
+ void setRB (MachineInstr &MI, MachineOperand &DefOP, MachineIRBuilder B,
87
+ MachineRegisterInfo &MRI, const RegisterBank &RB) {
88
+ Register Reg = DefOP.getReg ();
89
+ // Register that already has Register class got it during pre-inst selection
90
+ // of another instruction. Maybe cross bank copy was required so we insert a
91
+ // copy trat can be removed later. This simplifies post-rb-legalize artifact
92
+ // combiner and avoids need to special case some patterns.
93
+ if (MRI.getRegClassOrNull (Reg)) {
94
+ LLT Ty = MRI.getType (Reg);
95
+ Register NewReg = MRI.createVirtualRegister ({&RB, Ty});
96
+ DefOP.setReg (NewReg);
97
+
98
+ auto &MBB = *MI.getParent ();
99
+ B.setInsertPt (MBB, MI.isPHI () ? MBB.getFirstNonPHI ()
100
+ : std::next (MI.getIterator ()));
101
+ B.buildCopy (Reg, NewReg);
102
+
103
+ // The problem was discoverd for uniform S1 that was used as both
104
+ // lane mask(vcc) and regular sgpr S1.
105
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
106
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
107
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
108
+ // - the regular regular sgpr S1(uniform) instruction is now broken since
109
+ // it uses sreg_64_xexec(S1) which is divergent.
110
+
111
+ // "Clear" reg classes from uses on generic instructions and but register
112
+ // banks instead.
113
+ for (auto &UseMI : MRI.use_instructions (Reg)) {
114
+ if (shouldRBSelect (UseMI)) {
115
+ for (MachineOperand &Op : UseMI.operands ()) {
116
+ if (Op.isReg () && Op.isUse () && Op.getReg () == Reg)
117
+ Op.setReg (NewReg);
118
+ }
119
+ }
120
+ }
121
+
122
+ } else {
123
+ MRI.setRegBank (Reg, RB);
124
+ }
125
+ }
126
+
127
+ void setRBUse (MachineInstr &MI, MachineOperand &UseOP, MachineIRBuilder B,
128
+ MachineRegisterInfo &MRI, const RegisterBank &RB) {
129
+ Register Reg = UseOP.getReg ();
130
+
131
+ LLT Ty = MRI.getType (Reg);
132
+ Register NewReg = MRI.createVirtualRegister ({&RB, Ty});
133
+ UseOP.setReg (NewReg);
134
+
135
+ if (MI.isPHI ()) {
136
+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
137
+ MachineBasicBlock *DefMBB = DefMI->getParent ();
138
+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
139
+ } else {
140
+ B.setInstr (MI);
141
+ }
142
+
143
+ B.buildCopy (NewReg, Reg);
144
+ }
145
+
146
+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
147
+ // the cycle
148
+ // Note: uniformity analysis does not consider that registers with vgpr def are
149
+ // divergent (you can have uniform value in vgpr).
150
+ // - TODO: implicit use of $exec could be implemented as indicator that
151
+ // instruction is divergent
152
+ bool isTemporalDivergenceCopy (Register Reg, MachineRegisterInfo &MRI) {
153
+ MachineInstr *MI = MRI.getVRegDef (Reg);
154
+ if (MI->getOpcode () == AMDGPU::COPY) {
155
+ for (auto Op : MI->implicit_operands ()) {
156
+ if (!Op.isReg ())
157
+ continue ;
158
+ Register Reg = Op.getReg ();
159
+ if (Reg == AMDGPU::EXEC) {
160
+ return true ;
161
+ }
162
+ }
163
+ }
164
+
165
+ return false ;
166
+ }
167
+
168
+ Register getVReg (MachineOperand &Op) {
169
+ if (!Op.isReg ())
170
+ return 0 ;
171
+
172
+ Register Reg = Op.getReg ();
173
+ if (!Reg.isVirtual ())
174
+ return 0 ;
175
+
176
+ return Reg;
177
+ }
178
+
179
+ bool AMDGPURBSelect::runOnMachineFunction (MachineFunction &MF) {
180
+ MachineUniformityInfo &MUI =
181
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
182
+ AMDGPU::IntrinsicLaneMaskAnalyzer ILMA (MF);
183
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
184
+ const RegisterBankInfo &RBI = *MF.getSubtarget ().getRegBankInfo ();
185
+
186
+ MachineIRBuilder B (MF);
187
+
188
+ // Assign register banks to ALL def registers on G_ instructions.
189
+ // Same for copies if they have no register bank or class on def.
190
+ for (MachineBasicBlock &MBB : MF) {
191
+ for (MachineInstr &MI : MBB) {
192
+ if (!shouldRBSelect (MI))
193
+ continue ;
194
+
195
+ for (MachineOperand &DefOP : MI.defs ()) {
196
+ Register DefReg = getVReg (DefOP);
197
+ if (!DefReg)
198
+ continue ;
199
+
200
+ // Copies can have register class on def registers.
201
+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
202
+ continue ;
203
+ }
204
+
205
+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
206
+ setRB (MI, DefOP, B, MRI, RBI.getRegBank (AMDGPU::SGPRRegBankID));
207
+ } else {
208
+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
209
+ setRB (MI, DefOP, B, MRI, RBI.getRegBank (AMDGPU::VCCRegBankID));
210
+ else
211
+ setRB (MI, DefOP, B, MRI, RBI.getRegBank (AMDGPU::VGPRRegBankID));
212
+ }
213
+ }
214
+ }
215
+ }
216
+
217
+ // At this point all virtual registers have register class or bank
218
+ // - Defs of G_ instructions have register banks.
219
+ // - Defs and uses of inst-selected instructions have register class.
220
+ // - Defs and uses of copies can have either register class or bank
221
+ // and most notably
222
+ // - Uses of G_ instructions can have either register class or bank
223
+
224
+ // Reassign uses of G_ instructions to only have register banks.
225
+ for (MachineBasicBlock &MBB : MF) {
226
+ for (MachineInstr &MI : MBB) {
227
+ if (!shouldRBSelect (MI))
228
+ continue ;
229
+
230
+ // Copies can have register class on use registers.
231
+ if (MI.isCopy ())
232
+ continue ;
233
+
234
+ for (MachineOperand &UseOP : MI.uses ()) {
235
+ Register UseReg = getVReg (UseOP);
236
+ if (!UseReg)
237
+ continue ;
238
+
239
+ if (!MRI.getRegClassOrNull (UseReg))
240
+ continue ;
241
+
242
+ if (!isTemporalDivergenceCopy (UseReg, MRI) &&
243
+ (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg))) {
244
+ setRBUse (MI, UseOP, B, MRI, RBI.getRegBank (AMDGPU::SGPRRegBankID));
245
+ } else {
246
+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
247
+ setRBUse (MI, UseOP, B, MRI, RBI.getRegBank (AMDGPU::VCCRegBankID));
248
+ else
249
+ setRBUse (MI, UseOP, B, MRI, RBI.getRegBank (AMDGPU::VGPRRegBankID));
250
+ }
251
+ }
252
+ }
253
+ }
254
+
255
+ // Defs and uses of G_ instructions have register banks exclusively.
256
+
257
+ return true ;
258
+ }
0 commit comments