Skip to content

Commit 8e33358

Browse files
AMDGPU/GlobalISel: Update divergence lowering tests
In preparations for implementing temporal divergence lowering for global-isel, switch llvm-ir tests for amdgpu divergence lowering to new reg bank select. Requires adding few simple regbanklegalize rules for these tests to work.
1 parent 68703a0 commit 8e33358

10 files changed

+1182
-328
lines changed

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,12 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
311311
}
312312

313313
// Opcodes that also support S1.
314+
if (Opc == G_FREEZE &&
315+
MRI.getType(MI->getOperand(0).getReg()) != LLT::scalar(1)) {
316+
RBLHelper.applyMappingTrivial(*MI);
317+
continue;
318+
}
319+
314320
if ((Opc == AMDGPU::G_CONSTANT || Opc == AMDGPU::G_FCONSTANT ||
315321
Opc == AMDGPU::G_IMPLICIT_DEF)) {
316322
Register Dst = MI->getOperand(0).getReg();

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,26 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI,
134134
switch (Mapping.LoweringMethod) {
135135
case DoNotLower:
136136
return;
137+
case VccExtToSel: {
138+
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
139+
Register Src = MI.getOperand(1).getReg();
140+
unsigned Opc = MI.getOpcode();
141+
if (Ty == S32 || Ty == S16) {
142+
auto True = B.buildConstant({VgprRB, Ty}, Opc == G_SEXT ? -1 : 1);
143+
auto False = B.buildConstant({VgprRB, Ty}, 0);
144+
B.buildSelect(MI.getOperand(0).getReg(), Src, True, False);
145+
}
146+
if (Ty == S64) {
147+
auto True = B.buildConstant({VgprRB, S32}, Opc == G_SEXT ? -1 : 1);
148+
auto False = B.buildConstant({VgprRB, S32}, 0);
149+
auto Sel = B.buildSelect({VgprRB, S32}, Src, True, False);
150+
B.buildMergeValues(
151+
MI.getOperand(0).getReg(),
152+
{Sel.getReg(0), Opc == G_SEXT ? Sel.getReg(0) : False.getReg(0)});
153+
}
154+
MI.eraseFromParent();
155+
return;
156+
}
137157
case UniExtToSel: {
138158
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
139159
auto True = B.buildConstant({SgprRB, Ty},
@@ -276,6 +296,8 @@ LLT RegBankLegalizeHelper::getTyFromID(RegBankLLTMappingApplyID ID) {
276296
case Sgpr64:
277297
case Vgpr64:
278298
return LLT::scalar(64);
299+
case VgprP0:
300+
return LLT::pointer(0, 64);
279301
case SgprP1:
280302
case VgprP1:
281303
return LLT::pointer(1, 64);
@@ -383,6 +405,7 @@ RegBankLegalizeHelper::getRegBankFromID(RegBankLLTMappingApplyID ID) {
383405
return SgprRB;
384406
case Vgpr32:
385407
case Vgpr64:
408+
case VgprP0:
386409
case VgprP1:
387410
case VgprP3:
388411
case VgprP4:
@@ -425,6 +448,7 @@ void RegBankLegalizeHelper::applyMappingDst(
425448
case SgprV4S32:
426449
case Vgpr32:
427450
case Vgpr64:
451+
case VgprP0:
428452
case VgprP1:
429453
case VgprP3:
430454
case VgprP4:
@@ -555,6 +579,7 @@ void RegBankLegalizeHelper::applyMappingSrc(
555579
// vgpr scalars, pointers and vectors
556580
case Vgpr32:
557581
case Vgpr64:
582+
case VgprP0:
558583
case VgprP1:
559584
case VgprP3:
560585
case VgprP4:
@@ -653,7 +678,8 @@ void RegBankLegalizeHelper::applyMappingPHI(MachineInstr &MI) {
653678
// We accept all types that can fit in some register class.
654679
// Uniform G_PHIs have all sgpr registers.
655680
// Divergent G_PHIs have vgpr dst but inputs can be sgpr or vgpr.
656-
if (Ty == LLT::scalar(32) || Ty == LLT::pointer(4, 64)) {
681+
if (Ty == LLT::scalar(32) || Ty == LLT::pointer(1, 64) ||
682+
Ty == LLT::pointer(4, 64)) {
657683
return;
658684
}
659685

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5050
return MRI.getType(Reg) == LLT::scalar(32);
5151
case S64:
5252
return MRI.getType(Reg) == LLT::scalar(64);
53+
case P0:
54+
return MRI.getType(Reg) == LLT::pointer(0, 64);
5355
case P1:
5456
return MRI.getType(Reg) == LLT::pointer(1, 64);
5557
case P3:
@@ -58,6 +60,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
5860
return MRI.getType(Reg) == LLT::pointer(4, 64);
5961
case P5:
6062
return MRI.getType(Reg) == LLT::pointer(5, 32);
63+
case V4S32:
64+
return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
6165
case B32:
6266
return MRI.getType(Reg).getSizeInBits() == 32;
6367
case B64:
@@ -78,6 +82,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
7882
return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
7983
case UniS64:
8084
return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
85+
case UniP0:
86+
return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
8187
case UniP1:
8288
return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
8389
case UniP3:
@@ -104,6 +110,8 @@ bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID,
104110
return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
105111
case DivS64:
106112
return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
113+
case DivP0:
114+
return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
107115
case DivP1:
108116
return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
109117
case DivP3:
@@ -433,16 +441,21 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
433441
addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
434442
.Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}})
435443
.Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
444+
.Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
445+
.Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
436446
.Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
437447

438448
addRulesForGOpcs({G_SHL}, Standard)
449+
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
439450
.Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
440451
.Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
441452

442453
// Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT, G_FCONSTANT
443454
// and G_FREEZE here, rest is trivially regbankselected earlier
455+
addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
444456
addRulesForGOpcs({G_CONSTANT})
445457
.Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
458+
addRulesForGOpcs({G_FREEZE}).Any({{DivS1}, {{Vcc}, {Vcc}}});
446459

447460
addRulesForGOpcs({G_ICMP})
448461
.Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
@@ -473,6 +486,7 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
473486

474487
addRulesForGOpcs({G_ZEXT, G_SEXT})
475488
.Any({{UniS32, S1}, {{Sgpr32}, {Sgpr32AExtBoolInReg}, UniExtToSel}})
489+
.Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
476490
.Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
477491
.Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}});
478492

@@ -527,9 +541,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
527541

528542
// clang-format off
529543
addRulesForGOpcs({G_LOAD})
544+
.Any({{DivB32, DivP0}, {{VgprB32}, {VgprP0}}})
545+
530546
.Any({{DivB32, DivP1}, {{VgprB32}, {VgprP1}}})
531547
.Any({{{UniB256, UniP1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
532548
.Any({{{UniB512, UniP1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
549+
.Any({{{UniB32, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}})
533550
.Any({{{UniB256, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {VgprP1}, SplitLoad}})
534551
.Any({{{UniB512, UniP1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {VgprP1}, SplitLoad}})
535552

@@ -558,15 +575,26 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
558575
// clang-format on
559576

560577
addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD}, Vector)
578+
.Div(S32, {{Vgpr32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
579+
.Uni(S32, {{UniInVgprS32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
561580
.Div(V4S32, {{VgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}})
562581
.Uni(V4S32, {{UniInVgprV4S32}, {SgprV4S32, Vgpr32, Vgpr32, Sgpr32}});
563582

564583
addRulesForGOpcs({G_STORE})
584+
.Any({{S32, P0}, {{}, {Vgpr32, VgprP0}}})
565585
.Any({{S32, P1}, {{}, {Vgpr32, VgprP1}}})
566586
.Any({{S64, P1}, {{}, {Vgpr64, VgprP1}}})
567587
.Any({{V4S32, P1}, {{}, {VgprV4S32, VgprP1}}});
568588

569-
addRulesForGOpcs({G_PTR_ADD}).Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}});
589+
addRulesForGOpcs({G_AMDGPU_BUFFER_STORE})
590+
.Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
591+
592+
addRulesForGOpcs({G_PTR_ADD})
593+
.Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
594+
.Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
595+
.Any({{DivP0}, {{VgprP0}, {VgprP0, Vgpr64}}});
596+
597+
addRulesForGOpcs({G_INTTOPTR}).Any({{UniP4}, {{SgprP4}, {Sgpr64}}});
570598

571599
addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
572600

@@ -582,15 +610,24 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
582610
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
583611

584612
addRulesForGOpcs({G_UITOFP})
613+
.Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
585614
.Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
586615
.Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat);
587616

588617
using namespace Intrinsic;
589618

619+
addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
620+
590621
// This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
591622
addRulesForIOpcs({amdgcn_end_cf}).Any({{_, S32}, {{}, {None, Sgpr32}}});
592623

593624
addRulesForIOpcs({amdgcn_if_break}, Standard)
594625
.Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
595626

627+
addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
628+
.Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
629+
630+
addRulesForIOpcs({amdgcn_readfirstlane})
631+
.Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}});
632+
596633
} // end initialize rules

llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,19 @@ enum UniformityLLTOpPredicateID {
5050
DivS64,
5151

5252
// pointers
53+
P0,
5354
P1,
5455
P3,
5556
P4,
5657
P5,
5758

59+
UniP0,
5860
UniP1,
5961
UniP3,
6062
UniP4,
6163
UniP5,
6264

65+
DivP0,
6366
DivP1,
6467
DivP3,
6568
DivP4,
@@ -124,6 +127,7 @@ enum RegBankLLTMappingApplyID {
124127
// vgpr scalars, pointers, vectors and B-types
125128
Vgpr32,
126129
Vgpr64,
130+
VgprP0,
127131
VgprP1,
128132
VgprP3,
129133
VgprP4,
@@ -162,6 +166,7 @@ enum RegBankLLTMappingApplyID {
162166
// vgpr. Lower it to two S32 vgpr ANDs.
163167
enum LoweringMethodID {
164168
DoNotLower,
169+
VccExtToSel,
165170
UniExtToSel,
166171
VgprToVccCopy,
167172
SplitTo32,

0 commit comments

Comments
 (0)