Skip to content

Commit d1a0572

Browse files
authored
[AMDGPU] Add buffer.fat.ptr.load.lds intrinsic wrapping raw rsrc version (#133015)
Add a buffer_fat_ptr_load_lds intrinsic, by analogy with global_load_lds, which enables using `ptr addrspace(7)` to set the rsrc and offset arguments to raw_ptr_buffer_load_lds.
1 parent 15750a0 commit d1a0572

File tree

3 files changed

+60
-0
lines changed

3 files changed

+60
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1941,6 +1941,27 @@ def int_amdgcn_s_buffer_prefetch_data : DefaultAttrsIntrinsic <
19411941

19421942
} // defset AMDGPUBufferIntrinsics
19431943

1944+
// A wrapper around raw_ptr_buffer_load_lds that takes the global offset
1945+
// from the addrspace(7) pointer argument.
1946+
def int_amdgcn_buffer_fat_ptr_load_lds : Intrinsic <
1947+
[],
1948+
[LLVMQualPointerType<7>, // buffer fat pointer (SGPRx4 + VGPR)
1949+
LLVMQualPointerType<3>, // LDS base offset
1950+
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
1951+
llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
1952+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1953+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1954+
// bit 3 = swz, bit 4 = scc (gfx90a)
1955+
// gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1956+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1957+
// bit 6 = swz
1958+
// all: volatile op (bit 31, stripped at lowering)
1959+
[IntrWillReturn, IntrArgMemOnly,
1960+
ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
1961+
WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
1962+
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
1963+
ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>;
1964+
19441965
// Uses that do not set the done bit should set IntrWriteMem on the
19451966
// call site.
19461967
def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2167,6 +2167,7 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
21672167
case Intrinsic::memset:
21682168
case Intrinsic::memset_inline:
21692169
case Intrinsic::experimental_memset_pattern:
2170+
case Intrinsic::amdgcn_buffer_fat_ptr_load_lds:
21702171
return true;
21712172
}
21722173
}
@@ -2255,6 +2256,26 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
22552256
SplitUsers.insert(&I);
22562257
return {NewRsrc, Off};
22572258
}
2259+
case Intrinsic::amdgcn_buffer_fat_ptr_load_lds: {
2260+
Value *BufferPtr = I.getArgOperand(0);
2261+
assert(isSplitFatPtr(BufferPtr->getType()) &&
2262+
"amdgcn.buffer.fat.pointer.load.lds must have a buffer fat pointer "
2263+
"as argument 0");
2264+
IRB.SetInsertPoint(&I);
2265+
auto [Rsrc, Off] = getPtrParts(BufferPtr);
2266+
Value *LDSPtr = I.getArgOperand(1);
2267+
Value *LoadSize = I.getArgOperand(2);
2268+
Value *ImmOff = I.getArgOperand(3);
2269+
Value *Aux = I.getArgOperand(4);
2270+
Value *SOffset = IRB.getInt32(0);
2271+
Instruction *NewLoad = IRB.CreateIntrinsic(
2272+
Intrinsic::amdgcn_raw_ptr_buffer_load_lds, {},
2273+
{Rsrc, LDSPtr, LoadSize, Off, SOffset, ImmOff, Aux});
2274+
copyMetadata(NewLoad, &I);
2275+
SplitUsers.insert(&I);
2276+
I.replaceAllUsesWith(NewLoad);
2277+
return {nullptr, nullptr};
2278+
}
22582279
}
22592280
return {nullptr, nullptr};
22602281
}

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,3 +1724,21 @@ define void @memset_pattern_unknown(ptr addrspace(7) inreg %ptr, i32 inreg %leng
17241724
call void @llvm.experimental.memset.pattern.p7.i32.i32(ptr addrspace(7) %ptr, i32 1, i32 %length, i1 false)
17251725
ret void
17261726
}
1727+
1728+
;;; Buffer load to LDS
1729+
1730+
declare void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7), ptr addrspace(3), i32 immarg, i32 immarg, i32 immarg)
1731+
1732+
define void @llvm_amdgcn_buffer_fat_ptr_load_lds(ptr addrspace(7) inreg %p, ptr addrspace(3) inreg %l, i32 %idx) {
1733+
; CHECK-LABEL: define void @llvm_amdgcn_buffer_fat_ptr_load_lds(
1734+
; CHECK-SAME: { ptr addrspace(8), i32 } inreg [[P:%.*]], ptr addrspace(3) inreg [[L:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
1735+
; CHECK-NEXT: [[P_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 0
1736+
; CHECK-NEXT: [[P_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 1
1737+
; CHECK-NEXT: [[Q:%.*]] = add i32 [[P_OFF]], [[IDX]]
1738+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[P_RSRC]], ptr addrspace(3) [[L]], i32 4, i32 [[Q]], i32 0, i32 16, i32 0)
1739+
; CHECK-NEXT: ret void
1740+
;
1741+
%q = getelementptr i8, ptr addrspace(7) %p, i32 %idx
1742+
call void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7) %q, ptr addrspace(3) %l, i32 4, i32 16, i32 0)
1743+
ret void
1744+
}

0 commit comments

Comments
 (0)