Skip to content

Commit d116e84

Browse files
committed
AMDGPU/GFX10: Support DLC bit in llvm.amdgcn.s.buffer.load intrinsic
Summary: Change-Id: Ie4c971462a7749740938c687144e77441dac2539 Reviewers: rampitec, arsenm Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62486 Change-Id: Iae59523edd75c74918d2118df6571a7b671717a0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363514 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent c6fce12 commit d116e84

File tree

7 files changed

+103
-56
lines changed

7 files changed

+103
-56
lines changed

include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -864,7 +864,7 @@ def int_amdgcn_s_buffer_load : Intrinsic <
864864
[llvm_any_ty],
865865
[llvm_v4i32_ty, // rsrc(SGPR)
866866
llvm_i32_ty, // byte offset(SGPR/imm)
867-
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc)
867+
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 2 = dlc)
868868
[IntrNoMem, ImmArg<2>]>,
869869
AMDGPURsrcIntrinsic<0>;
870870

lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5221,7 +5221,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
52215221
}
52225222

52235223
SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
5224-
SDValue Offset, SDValue GLC,
5224+
SDValue Offset, SDValue GLC, SDValue DLC,
52255225
SelectionDAG &DAG) const {
52265226
MachineFunction &MF = DAG.getMachineFunction();
52275227
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5234,7 +5234,8 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
52345234
SDValue Ops[] = {
52355235
Rsrc,
52365236
Offset, // Offset
5237-
GLC // glc
5237+
GLC,
5238+
DLC,
52385239
};
52395240
return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
52405241
DAG.getVTList(VT), Ops, VT, MMO);
@@ -5442,9 +5443,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
54425443
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
54435444
SDLoc(Op), MVT::i32);
54445445
case Intrinsic::amdgcn_s_buffer_load: {
5445-
unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
5446-
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
5447-
DAG.getTargetConstant(Cache & 1, DL, MVT::i1), DAG);
5446+
bool IsGFX10 = Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10;
5447+
SDValue GLC;
5448+
SDValue DLC = DAG.getTargetConstant(0, DL, MVT::i1);
5449+
if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
5450+
IsGFX10 ? &DLC : nullptr))
5451+
return Op;
5452+
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), GLC, DLC,
5453+
DAG);
54485454
}
54495455
case Intrinsic::amdgcn_fdiv_fast:
54505456
return lowerFDIV_FAST(Op, DAG);

lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
6060
SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
6161
SelectionDAG &DAG) const;
6262
SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
63-
SDValue GLC, SelectionDAG &DAG) const;
63+
SDValue GLC, SDValue DLC, SelectionDAG &DAG) const;
6464

6565
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
6666
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;

lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ def SIEncodingFamily {
3939
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;
4040

4141
def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
42-
SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>]>,
42+
SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
43+
SDTCisVT<4, i1>]>,
4344
[SDNPMayLoad, SDNPMemOperand]
4445
>;
4546

lib/Target/AMDGPU/SMInstructions.td

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -768,21 +768,23 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
768768
multiclass SMLoad_Pattern <string Instr, ValueType vt> {
769769
// 1. Offset as an immediate
770770
def : GCNPat <
771-
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc),
772-
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), 0))
771+
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc),
772+
(vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc),
773+
(as_i1imm $dlc)))
773774
>;
774775

775776
// 2. 32-bit IMM offset on CI
776777
def : GCNPat <
777-
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc)),
778-
(!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), 0)> {
778+
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)),
779+
(!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> {
779780
let OtherPredicates = [isGFX7Only];
780781
}
781782

782783
// 3. Offset loaded in an 32bit SGPR
783784
def : GCNPat <
784-
(SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc),
785-
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), 0))
785+
(SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc),
786+
(vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc),
787+
(as_i1imm $dlc)))
786788
>;
787789
}
788790

test/CodeGen/AMDGPU/smrd-gfx10.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefixes=GCN %s
2+
3+
; GCN-LABEL: {{^}}smrd_imm_dlc:
4+
; GCN: s_buffer_load_dword s0, s[0:3], 0x0 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0xfa]
5+
define amdgpu_ps float @smrd_imm_dlc(<4 x i32> inreg %desc) #0 {
6+
main_body:
7+
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 4)
8+
ret float %r
9+
}
10+
11+
; GCN-LABEL: {{^}}smrd_sgpr_dlc:
12+
; GCN: s_buffer_load_dword s0, s[0:3], s4 dlc ; encoding: [0x00,0x40,0x20,0xf4,0x00,0x00,0x00,0x08]
13+
define amdgpu_ps float @smrd_sgpr_dlc(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
14+
main_body:
15+
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 4)
16+
ret float %r
17+
}
18+
19+
; GCN-LABEL: {{^}}smrd_imm_glc_dlc:
20+
; GCN: s_buffer_load_dword s0, s[0:3], 0x0 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0xfa]
21+
define amdgpu_ps float @smrd_imm_glc_dlc(<4 x i32> inreg %desc) #0 {
22+
main_body:
23+
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 0, i32 5)
24+
ret float %r
25+
}
26+
27+
; GCN-LABEL: {{^}}smrd_sgpr_glc_dlc:
28+
; GCN: s_buffer_load_dword s0, s[0:3], s4 glc dlc ; encoding: [0x00,0x40,0x21,0xf4,0x00,0x00,0x00,0x08]
29+
define amdgpu_ps float @smrd_sgpr_glc_dlc(<4 x i32> inreg %desc, i32 inreg %offset) #0 {
30+
main_body:
31+
%r = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %desc, i32 %offset, i32 5)
32+
ret float %r
33+
}
34+
35+
declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32)
36+
37+
!0 = !{}

0 commit comments

Comments
 (0)