Skip to content

Commit 09ed0e4

Browse files
committed
AMDGPU/GlobalISel: Select llvm.amdgcn.raw.tbuffer.load
1 parent 459cf6e commit 09ed0e4

11 files changed

+663
-42
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

+5
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,8 @@ def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SSHORT, SIbuffer_load_short>;
137137
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_SBYTE, SIbuffer_load_byte>;
138138
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT, SIbuffer_load_format>;
139139
def : GINodeEquiv<G_AMDGPU_BUFFER_LOAD_FORMAT_D16, SIbuffer_load_format_d16>;
140+
def : GINodeEquiv<G_AMDGPU_TBUFFER_LOAD_FORMAT, SItbuffer_load>;
141+
def : GINodeEquiv<G_AMDGPU_TBUFFER_LOAD_FORMAT_D16, SItbuffer_load_d16>;
140142

141143
// FIXME: Check MMO is atomic
142144
def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;
@@ -234,6 +236,9 @@ def gi_as_i32timm : GICustomOperandRenderer<"renderTruncTImm32">,
234236
def gi_as_i16timm : GICustomOperandRenderer<"renderTruncTImm16">,
235237
GISDNodeXFormEquiv<as_i16timm>;
236238

239+
def gi_as_i8timm : GICustomOperandRenderer<"renderTruncTImm8">,
240+
GISDNodeXFormEquiv<as_i8timm>;
241+
237242
def gi_as_i1timm : GICustomOperandRenderer<"renderTruncTImm1">,
238243
GISDNodeXFormEquiv<as_i1timm>;
239244

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

+3-15
Original file line numberDiff line numberDiff line change
@@ -2885,21 +2885,9 @@ void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
28852885

28862886
/// This only really exists to satisfy DAG type checking machinery, so is a
28872887
/// no-op here.
2888-
void AMDGPUInstructionSelector::renderTruncTImm32(MachineInstrBuilder &MIB,
2889-
const MachineInstr &MI,
2890-
int OpIdx) const {
2891-
MIB.addImm(MI.getOperand(OpIdx).getImm());
2892-
}
2893-
2894-
void AMDGPUInstructionSelector::renderTruncTImm16(MachineInstrBuilder &MIB,
2895-
const MachineInstr &MI,
2896-
int OpIdx) const {
2897-
MIB.addImm(MI.getOperand(OpIdx).getImm());
2898-
}
2899-
2900-
void AMDGPUInstructionSelector::renderTruncTImm1(MachineInstrBuilder &MIB,
2901-
const MachineInstr &MI,
2902-
int OpIdx) const {
2888+
void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
2889+
const MachineInstr &MI,
2890+
int OpIdx) const {
29032891
MIB.addImm(MI.getOperand(OpIdx).getImm());
29042892
}
29052893

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

+21-5
Original file line numberDiff line numberDiff line change
@@ -218,12 +218,28 @@ class AMDGPUInstructionSelector : public InstructionSelector {
218218
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
219219
int OpIdx = -1) const;
220220

221-
void renderTruncTImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
222-
int OpIdx) const;
223-
void renderTruncTImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
224-
int OpIdx) const;
221+
void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
222+
int OpIdx) const;
223+
225224
void renderTruncTImm1(MachineInstrBuilder &MIB, const MachineInstr &MI,
226-
int OpIdx) const;
225+
int OpIdx) const {
226+
renderTruncTImm(MIB, MI, OpIdx);
227+
}
228+
229+
void renderTruncTImm8(MachineInstrBuilder &MIB, const MachineInstr &MI,
230+
int OpIdx) const {
231+
renderTruncTImm(MIB, MI, OpIdx);
232+
}
233+
234+
void renderTruncTImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
235+
int OpIdx) const {
236+
renderTruncTImm(MIB, MI, OpIdx);
237+
}
238+
239+
void renderTruncTImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
240+
int OpIdx) const {
241+
renderTruncTImm(MIB, MI, OpIdx);
242+
}
227243

228244
void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
229245
int OpIdx) const;

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+31-10
Original file line numberDiff line numberDiff line change
@@ -2438,7 +2438,8 @@ bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
24382438
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
24392439
MachineRegisterInfo &MRI,
24402440
MachineIRBuilder &B,
2441-
bool IsFormat) const {
2441+
bool IsFormat,
2442+
bool IsTyped) const {
24422443
B.setInstr(MI);
24432444

24442445
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
@@ -2449,8 +2450,11 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
24492450
Register Dst = MI.getOperand(0).getReg();
24502451
Register RSrc = MI.getOperand(2).getReg();
24512452

2453+
// The typed intrinsics add an immediate after the registers.
2454+
const unsigned NumVIndexOps = IsTyped ? 8 : 7;
2455+
24522456
// The struct intrinsic variants add one additional operand over raw.
2453-
const bool HasVIndex = MI.getNumOperands() == 7;
2457+
const bool HasVIndex = MI.getNumOperands() == NumVIndexOps;
24542458
Register VIndex;
24552459
int OpOffset = 0;
24562460
if (HasVIndex) {
@@ -2460,6 +2464,13 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
24602464

24612465
Register VOffset = MI.getOperand(3 + OpOffset).getReg();
24622466
Register SOffset = MI.getOperand(4 + OpOffset).getReg();
2467+
2468+
unsigned Format = 0;
2469+
if (IsTyped) {
2470+
Format = MI.getOperand(5 + OpOffset).getImm();
2471+
++OpOffset;
2472+
}
2473+
24632474
unsigned AuxiliaryData = MI.getOperand(5 + OpOffset).getImm();
24642475
unsigned ImmOffset;
24652476
unsigned TotalOffset;
@@ -2474,7 +2485,11 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
24742485
MMO = B.getMF().getMachineMemOperand(MMO, TotalOffset, MemSize);
24752486

24762487
unsigned Opc;
2477-
if (IsFormat) {
2488+
2489+
if (IsTyped) {
2490+
Opc = IsD16 ? AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16 :
2491+
AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT;
2492+
} else if (IsFormat) {
24782493
Opc = IsD16 ? AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16 :
24792494
AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT;
24802495
} else {
@@ -2506,16 +2521,20 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
25062521
if (!VIndex)
25072522
VIndex = B.buildConstant(S32, 0).getReg(0);
25082523

2509-
B.buildInstr(Opc)
2524+
auto MIB = B.buildInstr(Opc)
25102525
.addDef(LoadDstReg) // vdata
25112526
.addUse(RSrc) // rsrc
25122527
.addUse(VIndex) // vindex
25132528
.addUse(VOffset) // voffset
25142529
.addUse(SOffset) // soffset
2515-
.addImm(ImmOffset) // offset(imm)
2516-
.addImm(AuxiliaryData) // cachepolicy, swizzled buffer(imm)
2517-
.addImm(HasVIndex ? -1 : 0) // idxen(imm)
2518-
.addMemOperand(MMO);
2530+
.addImm(ImmOffset); // offset(imm)
2531+
2532+
if (IsTyped)
2533+
MIB.addImm(Format);
2534+
2535+
MIB.addImm(AuxiliaryData) // cachepolicy, swizzled buffer(imm)
2536+
.addImm(HasVIndex ? -1 : 0) // idxen(imm)
2537+
.addMemOperand(MMO);
25192538

25202539
if (LoadDstReg != Dst) {
25212540
B.setInsertPt(B.getMBB(), ++B.getInsertPt());
@@ -2674,10 +2693,12 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
26742693
return legalizeRawBufferStore(MI, MRI, B, true);
26752694
case Intrinsic::amdgcn_raw_buffer_load:
26762695
case Intrinsic::amdgcn_struct_buffer_load:
2677-
return legalizeBufferLoad(MI, MRI, B, false);
2696+
return legalizeBufferLoad(MI, MRI, B, false, false);
26782697
case Intrinsic::amdgcn_raw_buffer_load_format:
26792698
case Intrinsic::amdgcn_struct_buffer_load_format:
2680-
return legalizeBufferLoad(MI, MRI, B, true);
2699+
return legalizeBufferLoad(MI, MRI, B, true, false);
2700+
case Intrinsic::amdgcn_raw_tbuffer_load:
2701+
return legalizeBufferLoad(MI, MRI, B, true, true);
26812702
case Intrinsic::amdgcn_atomic_inc:
26822703
return legalizeAtomicIncDec(MI, B, true);
26832704
case Intrinsic::amdgcn_atomic_dec:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
115115
bool legalizeRawBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
116116
MachineIRBuilder &B, bool IsFormat) const;
117117
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
118-
MachineIRBuilder &B, bool IsFormat) const;
118+
MachineIRBuilder &B, bool IsFormat,
119+
bool IsTyped) const;
119120

120121
bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
121122
bool IsInc) const;

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -2249,7 +2249,9 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
22492249
case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
22502250
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
22512251
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
2252-
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16: {
2252+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
2253+
case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
2254+
case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16: {
22532255
applyDefaultMapping(OpdMapper);
22542256
executeInWaterfallLoop(MI, MRI, {1, 4});
22552257
return;
@@ -3073,7 +3075,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
30733075
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
30743076
case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
30753077
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3076-
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16: {
3078+
case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3079+
case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3080+
case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16: {
30773081
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
30783082

30793083
// rsrc
@@ -3087,6 +3091,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
30873091

30883092
// soffset
30893093
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
3094+
3095+
// Any remaining operands are immediates and were correctly null
3096+
// initialized.
30903097
break;
30913098
}
30923099
case AMDGPU::G_INTRINSIC: {

llvm/lib/Target/AMDGPU/BUFInstructions.td

+11-9
Original file line numberDiff line numberDiff line change
@@ -1694,26 +1694,26 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
16941694
def : GCNPat<
16951695
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
16961696
timm:$format, timm:$auxiliary, 0)),
1697-
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
1698-
(as_i8imm $format),
1697+
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
1698+
(as_i8timm $format),
16991699
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
17001700
(extract_swz $auxiliary))
17011701
>;
17021702

17031703
def : GCNPat<
17041704
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
17051705
timm:$format, timm:$auxiliary, timm)),
1706-
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
1707-
(as_i8imm $format),
1706+
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
1707+
(as_i8timm $format),
17081708
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
17091709
(extract_swz $auxiliary))
17101710
>;
17111711

17121712
def : GCNPat<
17131713
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
17141714
timm:$format, timm:$auxiliary, 0)),
1715-
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
1716-
(as_i8imm $format),
1715+
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
1716+
(as_i8timm $format),
17171717
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
17181718
(extract_swz $auxiliary))
17191719
>;
@@ -1722,9 +1722,9 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
17221722
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
17231723
timm:$format, timm:$auxiliary, timm)),
17241724
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
1725-
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
1726-
$rsrc, $soffset, (as_i16imm $offset),
1727-
(as_i8imm $format),
1725+
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
1726+
SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
1727+
(as_i8timm $format),
17281728
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
17291729
(extract_swz $auxiliary))
17301730
>;
@@ -1741,12 +1741,14 @@ defm : MTBUF_LoadIntrinsicPat<SItbuffer_load, v4f32, "TBUFFER_LOAD_FORMAT_XYZW">
17411741

17421742
let SubtargetPredicate = HasUnpackedD16VMem in {
17431743
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
1744+
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X_gfx80">;
17441745
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XY_gfx80">;
17451746
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4i32, "TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80">;
17461747
} // End HasUnpackedD16VMem.
17471748

17481749
let SubtargetPredicate = HasPackedD16VMem in {
17491750
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
1751+
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_X">;
17501752
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
17511753
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
17521754
} // End HasPackedD16VMem.

llvm/lib/Target/AMDGPU/SIInstrInfo.td

+4
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,10 @@ def as_i8imm : SDNodeXForm<imm, [{
677677
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i8);
678678
}]>;
679679

680+
def as_i8timm : SDNodeXForm<timm, [{
681+
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
682+
}]>;
683+
680684
def as_i16imm : SDNodeXForm<imm, [{
681685
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i16);
682686
}]>;

llvm/lib/Target/AMDGPU/SIInstructions.td

+12
Original file line numberDiff line numberDiff line change
@@ -2155,13 +2155,25 @@ class BufferLoadGenericInstruction : AMDGPUGenericInstruction {
21552155
let mayLoad = 1;
21562156
}
21572157

2158+
class TBufferLoadGenericInstruction : AMDGPUGenericInstruction {
2159+
let OutOperandList = (outs type0:$dst);
2160+
let InOperandList = (ins type1:$rsrc, type2:$vindex, type2:$voffset,
2161+
type2:$soffset, untyped_imm_0:$offset, untyped_imm_0:$format,
2162+
untyped_imm_0:$cachepolicy, untyped_imm_0:$idxen);
2163+
let hasSideEffects = 0;
2164+
let mayLoad = 1;
2165+
}
2166+
21582167
def G_AMDGPU_BUFFER_LOAD_UBYTE : BufferLoadGenericInstruction;
21592168
def G_AMDGPU_BUFFER_LOAD_SBYTE : BufferLoadGenericInstruction;
21602169
def G_AMDGPU_BUFFER_LOAD_USHORT : BufferLoadGenericInstruction;
21612170
def G_AMDGPU_BUFFER_LOAD_SSHORT : BufferLoadGenericInstruction;
21622171
def G_AMDGPU_BUFFER_LOAD : BufferLoadGenericInstruction;
21632172
def G_AMDGPU_BUFFER_LOAD_FORMAT : BufferLoadGenericInstruction;
21642173
def G_AMDGPU_BUFFER_LOAD_FORMAT_D16 : BufferLoadGenericInstruction;
2174+
def G_AMDGPU_TBUFFER_LOAD_FORMAT : TBufferLoadGenericInstruction;
2175+
def G_AMDGPU_TBUFFER_LOAD_FORMAT_D16 : TBufferLoadGenericInstruction;
2176+
21652177

21662178
// Atomic cmpxchg. $cmpval ad $newval are packed in a single vector
21672179
// operand Expects a MachineMemOperand in addition to explicit

0 commit comments

Comments
 (0)