Skip to content

Commit 664334f

Browse files
author
Yong He
committed
Support relocation for descriptor offsets.
This is the original patch authored by stevenperron that adds relocation support to amdgpu for descriptor offsets.
1 parent 1ebc431 commit 664334f

File tree

6 files changed

+128
-4
lines changed

6 files changed

+128
-4
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1986,4 +1986,10 @@ def int_amdgcn_fdiv_fast : Intrinsic<
19861986
[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
19871987
[IntrNoMem, IntrSpeculatable]
19881988
>;
1989+
1990+
// Represent a relocation constant.
1991+
def int_amdgcn_reloc_constant : Intrinsic<
1992+
[llvm_i32_ty], [llvm_metadata_ty],
1993+
[IntrNoMem, IntrSpeculatable]
1994+
>;
19891995
}

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,6 +1607,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
16071607
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
16081608
}
16091609

1610+
if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) {
1611+
return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));
1612+
}
16101613
llvm_unreachable("Can't get register for value!");
16111614
}
16121615

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,11 +1760,23 @@ bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDNode *N,
17601760

17611761
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
17621762
SDValue &Offset, bool &Imm) const {
1763-
1764-
// FIXME: Handle non-constant offsets.
17651763
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
1766-
if (!C)
1764+
if (!C) {
1765+
if (ByteOffsetNode.getValueType().isScalarInteger() &&
1766+
ByteOffsetNode.getValueType().getSizeInBits() == 32) {
1767+
Offset = ByteOffsetNode;
1768+
Imm = false;
1769+
return true;
1770+
}
1771+
if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) {
1772+
if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) {
1773+
Offset = ByteOffsetNode.getOperand(0);
1774+
Imm = false;
1775+
return true;
1776+
}
1777+
}
17671778
return false;
1779+
}
17681780

17691781
SDLoc SL(ByteOffsetNode);
17701782
GCNSubtarget::Generation Gen = Subtarget->getGeneration();
@@ -1829,7 +1841,8 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
18291841
// wraparound, because s_load instructions perform the addition in 64 bits.
18301842
if ((Addr.getValueType() != MVT::i32 ||
18311843
Addr->getFlags().hasNoUnsignedWrap()) &&
1832-
CurDAG->isBaseWithConstantOffset(Addr)) {
1844+
(CurDAG->isBaseWithConstantOffset(Addr) ||
1845+
Addr.getOpcode() == ISD::ADD)) {
18331846
SDValue N0 = Addr.getOperand(0);
18341847
SDValue N1 = Addr.getOperand(1);
18351848

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6046,6 +6046,16 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
60466046
DAG.getConstant(1, SL, MVT::i32));
60476047
return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
60486048
}
6049+
case Intrinsic::amdgcn_reloc_constant: {
6050+
Module *M = const_cast<Module *>(MF.getFunction().getParent());
6051+
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
6052+
auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
6053+
auto RelocSymbol = cast<GlobalVariable>(
6054+
M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
6055+
SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0,
6056+
SIInstrInfo::MO_ABS32_LO);
6057+
return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
6058+
}
60496059
default:
60506060
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
60516061
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
; Test that DAG->DAG ISel is able to pick up the S_LOAD_DWORDX4_SGPR instruction that fetches the offset
2+
; from a register.
3+
4+
; RUN: llc -march=amdgcn -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
5+
6+
; GCN: %[[OFFSET:[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @DescriptorBuffer
7+
; GCN: %{{[0-9]+}}:sgpr_128 = S_LOAD_DWORDX4_SGPR killed %{{[0-9]+}}, killed %[[OFFSET]], 0, 0 :: (invariant load 16 from %ir.13, addrspace 4)
8+
9+
define amdgpu_cs void @test_load_zext(i32 inreg %0, i32 inreg %1, i32 inreg %resNode0, i32 inreg %resNode1, <3 x i32> inreg %2, i32 inreg %3, <3 x i32> %4) local_unnamed_addr #2 {
10+
.entry:
11+
%5 = call i64 @llvm.amdgcn.s.getpc() #3
12+
%6 = bitcast i64 %5 to <2 x i32>
13+
%7 = insertelement <2 x i32> %6, i32 %resNode0, i32 0
14+
%8 = bitcast <2 x i32> %7 to i64
15+
%9 = inttoptr i64 %8 to [4294967295 x i8] addrspace(4)*
16+
%10 = call i32 @llvm.amdgcn.reloc.constant(metadata !4)
17+
%11 = zext i32 %10 to i64
18+
%12 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %9, i64 0, i64 %11
19+
%13 = bitcast i8 addrspace(4)* %12 to <4 x i32> addrspace(4)*, !amdgpu.uniform !5
20+
%14 = load <4 x i32>, <4 x i32> addrspace(4)* %13, align 16, !invariant.load !5
21+
%15 = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %14, i32 0, i32 0)
22+
call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %15, <4 x i32> %14, i32 0, i32 0, i32 0)
23+
ret void
24+
}
25+
26+
declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
27+
; Function Attrs: nounwind writeonly
28+
declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1
29+
30+
; Function Attrs: nounwind readnone speculatable
31+
declare i32 @llvm.amdgcn.reloc.constant(metadata) #3
32+
33+
; Function Attrs: nounwind readnone speculatable
34+
declare i64 @llvm.amdgcn.s.getpc() #3
35+
36+
; Function Attrs: nounwind readnone
37+
declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) #1
38+
39+
attributes #0 = { argmemonly nounwind willreturn }
40+
attributes #1 = { nounwind readnone }
41+
attributes #2 = { nounwind "amdgpu-unroll-threshold"="700" }
42+
attributes #3 = { nounwind readnone speculatable }
43+
attributes #4 = { nounwind writeonly }
44+
45+
!llpc.compute.mode = !{!0}
46+
!llpc.options = !{!1}
47+
!llpc.options.CS = !{!2}
48+
!llpc.user.data.nodes = !{!3, !4, !5, !6}
49+
!amdgpu.pal.metadata.msgpack = !{!7}
50+
51+
!0 = !{i32 2, i32 3, i32 1}
52+
!1 = !{i32 245227952, i32 996822128, i32 2024708198, i32 497230408}
53+
!2 = !{i32 1381820427, i32 1742110173, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 64}
54+
!3 = !{!"DescriptorTableVaPtr", i32 0, i32 1, i32 1}
55+
!4 = !{!"DescriptorBuffer", i32 4, i32 8, i32 0, i32 0}
56+
!5 = !{!"DescriptorTableVaPtr", i32 1, i32 1, i32 1}
57+
!6 = !{!"DescriptorBuffer", i32 4, i32 8, i32 1, i32 0}
58+
!7 = !{!"\82\B0amdpal.pipelines\91\88\A4.api\A6Vulkan\B0.hardware_stages\81\A3.cs\82\AB.sgpr_limith\AB.vgpr_limit\CD\01\00\B7.internal_pipeline_hash\92\CF;jLp\0E\9D\E1\B0\CF\1D\A3\22Hx\AE\98f\AA.registers\88\CD.\07\02\CD.\08\03\CD.\09\01\CD.\12\CE\00,\00\00\CD.\13\CD\0F\88\CD.@\CE\10\00\00\00\CD.B\00\CD.C\01\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CFg\D6}\DDR\\\E8\0B\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\02\AEamdpal.version\92\02\03"}
59+
!8 = !{i32 5}
60+
!9 = !{!"doff_0_0_b"}
61+
!10 = !{}
62+
!11 = !{!"doff_1_0_b"}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2+
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -filetype=obj -o %t.o < %s && llvm-readobj -relocations %t.o | FileCheck --check-prefix=ELF %s
3+
; GCN-LABEL: {{^}}ps_main:
4+
; GCN: v_mov_b32_{{.*}} v[[relocreg:[0-9]+]], doff_0_0_b@abs32@lo
5+
; GCN-NEXT: exp {{.*}} v[[relocreg]], {{.*}}
6+
; GCN-NEXT: s_endpgm
7+
; GCN-NEXT: .Lfunc_end
8+
9+
; ELF: Relocations [
10+
; ELF-NEXT: Section (3) .rel.text {
11+
; ELF-NEXT: 0x{{[0-9]+}} R_AMDGPU_ABS32 doff_0_0_b {{.*}}
12+
13+
define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local_unnamed_addr #0 {
14+
%rc = call i32 @llvm.amdgcn.reloc.constant(metadata !1)
15+
%rcf = bitcast i32 %rc to float
16+
call void @llvm.amdgcn.exp.f32(i32 immarg 40, i32 immarg 15, float %rcf, float undef, float undef, float undef, i1 immarg false, i1 immarg false) #0
17+
ret void
18+
}
19+
20+
; Function Attrs: inaccessiblememonly nounwind
21+
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1
22+
23+
; Function Attrs: nounwind readnone speculatable
24+
declare i32 @llvm.amdgcn.reloc.constant(metadata) #2
25+
26+
attributes #0 = { nounwind }
27+
attributes #1 = { inaccessiblememonly nounwind }
28+
attributes #2 = { nounwind readnone speculatable }
29+
30+
!1 = !{!"doff_0_0_b"}

0 commit comments

Comments
 (0)