Skip to content

Commit 6e0b003

Browse files
authored
[clang][OpenCL][CodeGen][AMDGPU] Do not use private as the default AS for when generic is available (#112442)
Currently, for AMDGPU, when compiling for OpenCL, we unconditionally use `private` as the default address space. This is wrong for cases where the `generic` address space is available, and is corrected via this patch. In general, this AS map abuse is a bad hack and we should re-work it altogether, but at least after this patch we will stop being incorrect for e.g. OpenCL 2.0.
1 parent aea60ab commit 6e0b003

20 files changed

+1154
-549
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
260260
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
261261
TargetInfo::adjust(Diags, Opts);
262262
// ToDo: There are still a few places using default address space as private
263-
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
264-
// can be removed from the following line.
265-
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
263+
// address space in OpenCL, which needs to be cleaned up, then the references
264+
// to OpenCL can be removed from the following line.
265+
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
266266
!isAMDGCN(getTriple()));
267267
}
268268

clang/lib/CodeGen/CGBlocks.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
13971397
DI->setLocation(D->getLocation());
13981398
DI->EmitDeclareOfBlockLiteralArgVariable(
13991399
*BlockInfo, D->getName(), argNum,
1400-
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
1400+
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
1401+
Builder);
14011402
}
14021403
}
14031404

clang/lib/CodeGen/CGBuiltin.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -5853,8 +5853,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
58535853
/*IndexTypeQuals=*/0);
58545854
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
58555855
llvm::Value *TmpPtr = Tmp.getPointer();
5856+
// The EmitLifetime* pair expect a naked Alloca as their last argument,
5857+
// however for cases where the default AS is not the Alloca AS, Tmp is
5858+
// actually the Alloca ascasted to the default AS, hence the
5859+
// stripPointerCasts()
5860+
llvm::Value *Alloca = TmpPtr->stripPointerCasts();
58565861
llvm::Value *TmpSize = EmitLifetimeStart(
5857-
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
5862+
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
58585863
llvm::Value *ElemPtr;
58595864
// Each of the following arguments specifies the size of the corresponding
58605865
// argument passed to the enqueued block.
@@ -5870,7 +5875,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
58705875
Builder.CreateAlignedStore(
58715876
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
58725877
}
5873-
return std::tie(ElemPtr, TmpSize, TmpPtr);
5878+
// Return the Alloca itself rather than a potential ascast as this is only
5879+
// used by the paired EmitLifetimeEnd.
5880+
return std::tie(ElemPtr, TmpSize, Alloca);
58745881
};
58755882

58765883
// Could have events and/or varargs.

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

+99-70
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl

+20-16
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
1616
// CL20-NEXT: [[ENTRY:.*:]]
1717
// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
18-
// CL20-NEXT: store ptr [[X]], ptr addrspace(5) [[X_ADDR]], align 8
19-
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[X_ADDR]], align 8
18+
// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
19+
// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
20+
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
2021
// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4
2122
// CL20-NEXT: ret void
2223
//
@@ -54,25 +55,27 @@ void func1(int *x) {
5455
// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
5556
// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
5657
// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
57-
// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4
58-
// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4
59-
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
60-
// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
6158
// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
62-
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8
63-
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
64-
// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
65-
// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8
66-
// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
67-
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]]
68-
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4
69-
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4
59+
// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
60+
// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
61+
// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
62+
// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
63+
// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
64+
// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4
65+
// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4
66+
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
67+
// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4
68+
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
69+
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
70+
// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
71+
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
72+
// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4
73+
// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4
7074
// CL20-NEXT: ret void
7175
//
7276
void func2(void) {
7377
int lv1;
7478
lv1 = 1;
75-
7679
int lv2 = 2;
7780

7881
int la[100];
@@ -99,7 +102,8 @@ void func2(void) {
99102
// CL20-SAME: ) #[[ATTR0]] {
100103
// CL20-NEXT: [[ENTRY:.*:]]
101104
// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
102-
// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
105+
// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
106+
// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
103107
// CL20-NEXT: ret void
104108
//
105109
void func3(void) {

0 commit comments

Comments
 (0)