Skip to content

Commit 904ed21

Browse files
committed
[AMDGPU] Add buffer.fat.ptr.load.lds intrinsic wrapping raw rsrc version
Add a buffer_fat_ptr_load_lds intrinsic, by analogy with global_loab_lds, which enables using `ptr addrspace(7)` to set the rsrc and offset arguments to raw_ptr_buffer_load_lds.
1 parent df01131 commit 904ed21

File tree

3 files changed

+60
-0
lines changed

3 files changed

+60
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1939,6 +1939,27 @@ def int_amdgcn_s_buffer_prefetch_data : DefaultAttrsIntrinsic <
19391939

19401940
} // defset AMDGPUBufferIntrinsics
19411941

1942+
// A wrapper around raw_ptr_buffer_load_lds that takes the global offset
1943+
// from the addrspace(7) pointer argument.
1944+
def int_amdgcn_buffer_fat_ptr_load_lds : Intrinsic <
1945+
[],
1946+
[LLVMQualPointerType<7>, // buffer fat pointer (SGPRx4 + VGPR)
1947+
LLVMQualPointerType<3>, // LDS base offset
1948+
llvm_i32_ty, // Data byte size: 1/2/4 (/12/16 for gfx950)
1949+
llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
1950+
llvm_i32_ty], // auxiliary/cachepolicy(imm):
1951+
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
1952+
// bit 3 = swz, bit 4 = scc (gfx90a)
1953+
// gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
1954+
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
1955+
// bit 6 = swz
1956+
// all: volatile op (bit 31, stripped at lowering)
1957+
[IntrWillReturn, IntrArgMemOnly,
1958+
ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
1959+
WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
1960+
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
1961+
ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>;
1962+
19421963
// Uses that do not set the done bit should set IntrWriteMem on the
19431964
// call site.
19441965
def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2157,6 +2157,7 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
21572157
case Intrinsic::memset:
21582158
case Intrinsic::memset_inline:
21592159
case Intrinsic::experimental_memset_pattern:
2160+
case Intrinsic::amdgcn_buffer_fat_ptr_load_lds:
21602161
return true;
21612162
}
21622163
}
@@ -2245,6 +2246,26 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
22452246
SplitUsers.insert(&I);
22462247
return {NewRsrc, Off};
22472248
}
2249+
case Intrinsic::amdgcn_buffer_fat_ptr_load_lds: {
2250+
Value *BufferPtr = I.getArgOperand(0);
2251+
assert(isSplitFatPtr(BufferPtr->getType()) &&
2252+
"amdgcn.buffer.fat.pointer.load.lds has a buffer fat pointer as "
2253+
"argument 0");
2254+
IRB.SetInsertPoint(&I);
2255+
auto [Rsrc, Off] = getPtrParts(BufferPtr);
2256+
Value *LDSPtr = I.getArgOperand(1);
2257+
Value *LoadSize = I.getArgOperand(2);
2258+
Value *ImmOff = I.getArgOperand(3);
2259+
Value *Aux = I.getArgOperand(4);
2260+
Value *SOffset = IRB.getInt32(0);
2261+
Instruction *NewLoad = IRB.CreateIntrinsic(
2262+
Intrinsic::amdgcn_raw_ptr_buffer_load_lds, {},
2263+
{Rsrc, LDSPtr, LoadSize, Off, SOffset, ImmOff, Aux});
2264+
copyMetadata(NewLoad, &I);
2265+
SplitUsers.insert(&I);
2266+
I.replaceAllUsesWith(NewLoad);
2267+
return {nullptr, nullptr};
2268+
}
22482269
}
22492270
return {nullptr, nullptr};
22502271
}

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,3 +1728,21 @@ define void @memset_pattern_unknown(ptr addrspace(7) inreg %ptr, i32 inreg %leng
17281728
call void @llvm.experimental.memset.pattern.p7.i32.i32(ptr addrspace(7) %ptr, i32 1, i32 %length, i1 false)
17291729
ret void
17301730
}
1731+
1732+
;;; Buffer load to LDS
1733+
1734+
declare void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7), ptr addrspace(3), i32 immarg, i32 immarg, i32 immarg)
1735+
1736+
define void @llvm_amdgcn_buffer_fat_ptr_load_lds(ptr addrspace(7) inreg %p, ptr addrspace(3) inreg %l, i32 %idx) {
1737+
; CHECK-LABEL: define void @llvm_amdgcn_buffer_fat_ptr_load_lds(
1738+
; CHECK-SAME: { ptr addrspace(8), i32 } inreg [[P:%.*]], ptr addrspace(3) inreg [[L:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
1739+
; CHECK-NEXT: [[P_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 0
1740+
; CHECK-NEXT: [[P_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 1
1741+
; CHECK-NEXT: [[Q:%.*]] = add i32 [[P_OFF]], [[IDX]]
1742+
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[P_RSRC]], ptr addrspace(3) [[L]], i32 4, i32 [[Q]], i32 0, i32 16, i32 0)
1743+
; CHECK-NEXT: ret void
1744+
;
1745+
%q = getelementptr i8, ptr addrspace(7) %p, i32 %idx
1746+
call void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7) %q, ptr addrspace(3) %l, i32 4, i32 16, i32 0)
1747+
ret void
1748+
}

0 commit comments

Comments
 (0)