Skip to content

Commit 2d399d1

Browse files
AlexVlxronlieb
authored andcommitted
[clang][OpenCL][CodeGen][AMDGPU] Do not use private as the default AS for when generic is available (llvm#112442)
Currently, for AMDGPU, when compiling for OpenCL, we unconditionally use `private` as the default address space. This is wrong for cases where the `generic` address space is available, and is corrected via this patch. In general, this AS map abuse is a bad hack and we should re-work it altogether, but at least after this patch we will stop being incorrect for e.g. OpenCL 2.0.
1 parent 07ba9a2 commit 2d399d1

20 files changed

+1087
-498
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -269,9 +269,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
269269
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
270270
TargetInfo::adjust(Diags, Opts);
271271
// ToDo: There are still a few places using default address space as private
272-
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
273-
// can be removed from the following line.
274-
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
272+
// address space in OpenCL, which needs to be cleaned up, then the references
273+
// to OpenCL can be removed from the following line.
274+
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
275275
!isAMDGCN(getTriple()));
276276
}
277277

clang/lib/CodeGen/CGBlocks.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1375,7 +1375,8 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
13751375
DI->setLocation(D->getLocation());
13761376
DI->EmitDeclareOfBlockLiteralArgVariable(
13771377
*BlockInfo, D->getName(), argNum,
1378-
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
1378+
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
1379+
Builder);
13791380
}
13801381
}
13811382

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6240,8 +6240,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
62406240
/*IndexTypeQuals=*/0);
62416241
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
62426242
llvm::Value *TmpPtr = Tmp.getPointer();
6243+
// The EmitLifetime* pair expect a naked Alloca as their last argument,
6244+
// however for cases where the default AS is not the Alloca AS, Tmp is
6245+
// actually the Alloca ascasted to the default AS, hence the
6246+
// stripPointerCasts()
6247+
llvm::Value *Alloca = TmpPtr->stripPointerCasts();
62436248
llvm::Value *TmpSize = EmitLifetimeStart(
6244-
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
6249+
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
62456250
llvm::Value *ElemPtr;
62466251
// Each of the following arguments specifies the size of the corresponding
62476252
// argument passed to the enqueued block.
@@ -6257,7 +6262,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
62576262
Builder.CreateAlignedStore(
62586263
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
62596264
}
6260-
return std::tie(ElemPtr, TmpSize, TmpPtr);
6265+
// Return the Alloca itself rather than a potential ascast as this is only
6266+
// used by the paired EmitLifetimeEnd.
6267+
return std::tie(ElemPtr, TmpSize, Alloca);
62616268
};
62626269

62636270
// Could have events and/or varargs.

clang/test/CodeGenOpenCL/addr-space-struct-arg.cl

Lines changed: 99 additions & 70 deletions
Large diffs are not rendered by default.

clang/test/CodeGenOpenCL/amdgcn-automatic-variable.cl

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
// CL20-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
1616
// CL20-NEXT: [[ENTRY:.*:]]
1717
// CL20-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
18-
// CL20-NEXT: store ptr [[X]], ptr addrspace(5) [[X_ADDR]], align 8
19-
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[X_ADDR]], align 8
18+
// CL20-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
19+
// CL20-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8
20+
// CL20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8
2021
// CL20-NEXT: store i32 1, ptr [[TMP0]], align 4
2122
// CL20-NEXT: ret void
2223
//
@@ -54,25 +55,27 @@ void func1(int *x) {
5455
// CL20-NEXT: [[LP1:%.*]] = alloca ptr, align 8, addrspace(5)
5556
// CL20-NEXT: [[LP2:%.*]] = alloca ptr, align 8, addrspace(5)
5657
// CL20-NEXT: [[LVC:%.*]] = alloca i32, align 4, addrspace(5)
57-
// CL20-NEXT: store i32 1, ptr addrspace(5) [[LV1]], align 4
58-
// CL20-NEXT: store i32 2, ptr addrspace(5) [[LV2]], align 4
59-
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
60-
// CL20-NEXT: store i32 3, ptr addrspace(5) [[ARRAYIDX]], align 4
6158
// CL20-NEXT: [[LV1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
62-
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr addrspace(5) [[LP1]], align 8
63-
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr addrspace(5) [[LA]], i64 0, i64 0
64-
// CL20-NEXT: [[ARRAYDECAY_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARRAYDECAY]] to ptr
65-
// CL20-NEXT: store ptr [[ARRAYDECAY_ASCAST]], ptr addrspace(5) [[LP2]], align 8
66-
// CL20-NEXT: [[LV1_ASCAST1:%.*]] = addrspacecast ptr addrspace(5) [[LV1]] to ptr
67-
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST1]]) #[[ATTR2:[0-9]+]]
68-
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LVC]], align 4
69-
// CL20-NEXT: store i32 4, ptr addrspace(5) [[LV1]], align 4
59+
// CL20-NEXT: [[LV2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LV2]] to ptr
60+
// CL20-NEXT: [[LA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LA]] to ptr
61+
// CL20-NEXT: [[LP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP1]] to ptr
62+
// CL20-NEXT: [[LP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LP2]] to ptr
63+
// CL20-NEXT: [[LVC_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[LVC]] to ptr
64+
// CL20-NEXT: store i32 1, ptr [[LV1_ASCAST]], align 4
65+
// CL20-NEXT: store i32 2, ptr [[LV2_ASCAST]], align 4
66+
// CL20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
67+
// CL20-NEXT: store i32 3, ptr [[ARRAYIDX]], align 4
68+
// CL20-NEXT: store ptr [[LV1_ASCAST]], ptr [[LP1_ASCAST]], align 8
69+
// CL20-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[LA_ASCAST]], i64 0, i64 0
70+
// CL20-NEXT: store ptr [[ARRAYDECAY]], ptr [[LP2_ASCAST]], align 8
71+
// CL20-NEXT: call void @func1(ptr noundef [[LV1_ASCAST]]) #[[ATTR2:[0-9]+]]
72+
// CL20-NEXT: store i32 4, ptr [[LVC_ASCAST]], align 4
73+
// CL20-NEXT: store i32 4, ptr [[LV1_ASCAST]], align 4
7074
// CL20-NEXT: ret void
7175
//
7276
void func2(void) {
7377
int lv1;
7478
lv1 = 1;
75-
7679
int lv2 = 2;
7780

7881
int la[100];
@@ -99,7 +102,8 @@ void func2(void) {
99102
// CL20-SAME: ) #[[ATTR0]] {
100103
// CL20-NEXT: [[ENTRY:.*:]]
101104
// CL20-NEXT: [[A:%.*]] = alloca [16 x [1 x float]], align 4, addrspace(5)
102-
// CL20-NEXT: call void @llvm.memset.p5.i64(ptr addrspace(5) align 4 [[A]], i8 0, i64 64, i1 false)
105+
// CL20-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
106+
// CL20-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A_ASCAST]], i8 0, i64 64, i1 false)
103107
// CL20-NEXT: ret void
104108
//
105109
void func3(void) {

0 commit comments

Comments
 (0)