Skip to content

Commit ebc5139

Browse files
AMDGPU/GlobalISel: Update divergence lowering tests
In preparations for implementing temporal divergence lowering for global-isel, switch llvm-ir tests for amdgpu divergence lowering to new reg bank select. Requires adding few simple regbanklegalize rules for these tests to work.
1 parent 42ccf03 commit ebc5139

9 files changed

+683
-622
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,12 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
312312
}
313313

314314
// Opcodes that also support S1.
315+
if (Opc == G_FREEZE &&
316+
MRI.getType(MI->getOperand(0).getReg()) != LLT::scalar(1)) {
317+
RBLHelper.applyMappingTrivial(*MI);
318+
continue;
319+
}
320+
315321
if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT ||
316322
Opc == AMDGPU::G_IMPLICIT_DEF)) {
317323
Register Dst = MI->getOperand(0).getReg();

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,26 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
134134
switch (Mapping.LoweringMethod) {
135135
case DoNotLower:
136136
return;
137+
case VccExtToSel: {
138+
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
139+
Register Src = MI.getOperand(1).getReg();
140+
unsigned Opc = MI.getOpcode();
141+
if (Ty == S32 || Ty == S16) {
142+
auto True = B.buildConstant({VgprRB, Ty}, Opc == G_SEXT ? -1 : 1);
143+
auto False = B.buildConstant({VgprRB, Ty}, 0);
144+
B.buildSelect(MI.getOperand(0).getReg(), Src, True, False);
145+
}
146+
if (Ty == S64) {
147+
auto True = B.buildConstant({VgprRB, S32}, Opc == G_SEXT ? -1 : 1);
148+
auto False = B.buildConstant({VgprRB, S32}, 0);
149+
auto Sel = B.buildSelect({VgprRB, S32}, Src, True, False);
150+
B.buildMergeValues(
151+
MI.getOperand(0).getReg(),
152+
{Sel.getReg(0), Opc == G_SEXT ? Sel.getReg(0) : False.getReg(0)});
153+
}
154+
MI.eraseFromParent();
155+
return;
156+
}
137157
case UniExtToSel: {
138158
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
139159
auto True = B.buildConstant({SgprRB, Ty},
@@ -276,6 +296,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
276296
case Sgpr64:
277297
case Vgpr64:
278298
return LLT::scalar(64);
299+
case VgprP0:
300+
return LLT::pointer(0, 64);
279301
case SgprP1:
280302
case VgprP1:
281303
return LLT::pointer(1, 64);
@@ -383,6 +405,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
383405
return SgprRB;
384406
case Vgpr32:
385407
case Vgpr64:
408+
case VgprP0:
386409
case VgprP1:
387410
case VgprP3:
388411
case VgprP4:
@@ -425,6 +448,7 @@ void RegBankLegalizeHelper::applyMappingDst(
425448
case SgprV4S32:
426449
case Vgpr32:
427450
case Vgpr64:
451+
case VgprP0:
428452
case VgprP1:
429453
case VgprP3:
430454
case VgprP4:
@@ -555,6 +579,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
555579
// vgpr scalars, pointers and vectors
556580
case Vgpr32:
557581
case Vgpr64:
582+
case VgprP0:
558583
case VgprP1:
559584
case VgprP3:
560585
case VgprP4:
@@ -653,7 +678,8 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) {
653678
// We accept all types that can fit in some register class.
654679
// Uniform G_PHIs have all sgpr registers.
655680
// Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr.
656-
if (Ty == LLT::scalar(32) || Ty == LLT::pointer(4, 64)) {
681+
if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) ||
682+
Ty == LLT::pointer(4, 64)) {
657683
return;
658684
}
659685

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5050
return MRI.getType(Reg) == LLT::scalar(32);
5151
case S64:
5252
return MRI.getType(Reg) == LLT::scalar(64);
53+
case P0:
54+
return MRI.getType(Reg) == LLT::pointer(0, 64);
5355
case P1:
5456
return MRI.getType(Reg) == LLT::pointer(1, 64);
5557
case P3:
@@ -58,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5860
return MRI.getType(Reg) == LLT::pointer(4, 64);
5961
case P5:
6062
return MRI.getType(Reg) == LLT::pointer(5, 32);
63+
case V4S32:
64+
return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
6165
case B32:
6266
return MRI.getType(Reg).getSizeInBits() == 32;
6367
case B64:
@@ -78,6 +82,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
7882
return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
7983
case UniS64:
8084
return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
85+
case UniP0:
86+
return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
8187
case UniP1:
8288
return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
8389
case UniP3:
@@ -104,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
104110
return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
105111
case DivS64:
106112
return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
113+
case DivP0:
114+
return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
107115
case DivP1:
108116
return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
109117
case DivP3:
@@ -431,16 +439,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
431439
addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
432440
.Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
433441
.Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
442+
.Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
443+
.Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
434444
.Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
435445

436446
addRulesForGOpcs({G_SHL}, Standard)
447+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
437448
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
438449
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
439450

440451
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
441452
// and G_FREEZE here, rest is trivially regbankselected earlier
453+
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
442454
addRulesForGOpcs({G_CONSTANT})
443455
.Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
456+
addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
444457

445458
addRulesForGOpcs({G_ICMP})
446459
.Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
@@ -471,6 +484,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
471484

472485
addRulesForGOpcs({G_ZEXT, G_SEXT})
473486
.Any({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
487+
.Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
474488
.Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
475489
.Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}});
476490

@@ -525,9 +539,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
525539

526540
// clang-format off
527541
addRulesForGOpcs({G_LOAD})
542+
.Any({{DivB32, DivP0}, {{VgprB32}, {VgprP0}}})
543+
528544
.Any({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}})
529545
.Any({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
530546
.Any({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
547+
.Any({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}})
531548
.Any({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}})
532549
.Any({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}})
533550

@@ -556,15 +573,26 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
556573
// clang-format on
557574

558575
addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, Vector)
576+
.Div(S32, {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
577+
.Uni(S32, {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
559578
.Div(V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
560579
.Uni(V4S32, {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}});
561580

562581
addRulesForGOpcs({G_STORE})
582+
.Any({{S32, P0}, {{}, {Vgpr32, VgprP0}}})
563583
.Any({{S32, P1}, {{}, {Vgpr32, VgprP1}}})
564584
.Any({{S64, P1}, {{}, {Vgpr64, VgprP1}}})
565585
.Any({{V4S32, P1}, {{}, {VgprV4S32, VgprP1}}});
566586

567-
addRulesForGOpcs({G_PTR_ADD}).Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}});
587+
addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
588+
.Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
589+
590+
addRulesForGOpcs({G_PTR_ADD})
591+
.Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
592+
.Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
593+
.Any({{DivP0}, {{VgprP0}, {VgprP0, Vgpr64}}});
594+
595+
addRulesForGOpcs({G_INTTOPTR}).Any({{UniP4}, {{SgprP4}, {Sgpr64}}});
568596

569597
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
570598

@@ -580,15 +608,24 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
580608
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
581609

582610
addRulesForGOpcs({G_UITOFP})
611+
.Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
583612
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
584613
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
585614

586615
using namespace Intrinsic;
587616

617+
addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
618+
588619
// This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
589620
addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
590621

591622
addRulesForIOpcs({amdgcn_if_break}, Standard)
592623
.Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
593624

625+
addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
626+
.Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
627+
628+
addRulesForIOpcs({amdgcn_readfirstlane})
629+
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
630+
594631
} // end initialize rules

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,19 @@ enum UniformityLLTOpPredicateID {
5050
DivS64,
5151

5252
// pointers
53+
P0,
5354
P1,
5455
P3,
5556
P4,
5657
P5,
5758

59+
UniP0,
5860
UniP1,
5961
UniP3,
6062
UniP4,
6163
UniP5,
6264

65+
DivP0,
6366
DivP1,
6467
DivP3,
6568
DivP4,
@@ -124,6 +127,7 @@ enum RegBankLLTMappingApplyID {
124127
// vgpr scalars, pointers, vectors and B-types
125128
Vgpr32,
126129
Vgpr64,
130+
VgprP0,
127131
VgprP1,
128132
VgprP3,
129133
VgprP4,
@@ -162,6 +166,7 @@ enum RegBankLLTMappingApplyID {
162166
// vgpr. Lower it to two S32 vgpr ANDs.
163167
enum LoweringMethodID {
164168
DoNotLower,
169+
VccExtToSel,
165170
UniExtToSel,
166171
VgprToVccCopy,
167172
SplitTo32,

0 commit comments

Comments
 (0)