Skip to content

Commit 934c97d

Browse files
krzysz00arsenm
andauthored
[LowerBufferFatPointers] Fix support for GEP T, p7, <N x T> idxs (#126126)
The lowering for GEP didn't properly support the case where the pointer argument was being implicitly broadcast by a vector of indices. Fix that. --------- Co-authored-by: Matt Arsenault <[email protected]>
1 parent 574ccc6 commit 934c97d

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,14 +1804,25 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
18041804
bool IsNUW = GEP.hasNoUnsignedWrap();
18051805
bool IsNUSW = GEP.hasNoUnsignedSignedWrap();
18061806

1807+
StructType *ResTy = cast<StructType>(GEP.getType());
1808+
Type *ResRsrcTy = ResTy->getElementType(0);
1809+
VectorType *ResRsrcVecTy = dyn_cast<VectorType>(ResRsrcTy);
1810+
bool BroadcastsPtr = ResRsrcVecTy && !isa<VectorType>(Off->getType());
1811+
18071812
// In order to call emitGEPOffset() and thus not have to reimplement it,
18081813
// we need the GEP result to have ptr addrspace(7) type.
1809-
Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER);
1810-
if (auto *VT = dyn_cast<VectorType>(Off->getType()))
1811-
FatPtrTy = VectorType::get(FatPtrTy, VT->getElementCount());
1814+
Type *FatPtrTy =
1815+
ResRsrcTy->getWithNewType(IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER));
18121816
GEP.mutateType(FatPtrTy);
18131817
Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP);
1814-
GEP.mutateType(Ptr->getType());
1818+
GEP.mutateType(ResTy);
1819+
1820+
if (BroadcastsPtr) {
1821+
Rsrc = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Rsrc,
1822+
Rsrc->getName());
1823+
Off = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Off,
1824+
Off->getName());
1825+
}
18151826
if (match(OffAccum, m_Zero())) { // Constant-zero offset
18161827
SplitUsers.insert(&GEP);
18171828
return {Rsrc, Off};

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,24 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64
5959
ret <2 x ptr addrspace(7)> %ret
6060
}
6161

62+
define <2 x ptr addrspace(7)> @gep_scalar_vector(ptr addrspace(7) %in, <2 x i32> %idxs) {
63+
; CHECK-LABEL: define { <2 x ptr addrspace(8)>, <2 x i32> } @gep_scalar_vector
64+
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[IN:%.*]], <2 x i32> [[IDXS:%.*]]) #[[ATTR0]] {
65+
; CHECK-NEXT: [[IN_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 0
66+
; CHECK-NEXT: [[IN_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 1
67+
; CHECK-NEXT: [[IN_RSRC_SPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(8)> poison, ptr addrspace(8) [[IN_RSRC]], i64 0
68+
; CHECK-NEXT: [[IN_RSRC_SPLAT:%.*]] = shufflevector <2 x ptr addrspace(8)> [[IN_RSRC_SPLATINSERT]], <2 x ptr addrspace(8)> poison, <2 x i32> zeroinitializer
69+
; CHECK-NEXT: [[IN_OFF_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[IN_OFF]], i64 0
70+
; CHECK-NEXT: [[IN_OFF_SPLAT:%.*]] = shufflevector <2 x i32> [[IN_OFF_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
71+
; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF_SPLAT]], [[IDXS]]
72+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC_SPLAT]], 0
73+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1
74+
; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP2]]
75+
;
76+
%ret = getelementptr inbounds i8, ptr addrspace(7) %in, <2 x i32> %idxs
77+
ret <2 x ptr addrspace(7)> %ret
78+
}
79+
6280
define ptr addrspace(7) @simple_gep(ptr addrspace(7) %ptr, i32 %off) {
6381
; CHECK-LABEL: define { ptr addrspace(8), i32 } @simple_gep
6482
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]], i32 [[OFF:%.*]]) #[[ATTR0]] {

0 commit comments

Comments
 (0)