Skip to content

Commit 2cb3798

Browse files
arsenmtstellar
authored andcommitted
Revert "[clang][OpenCL][CodeGen][AMDGPU] Do not use private as the default AS for when generic is available (#112442)"
This reverts commit 6e0b003. This breaks the rocm-device-libs build, so it should not ship in the release.
1 parent c99be91 commit 2cb3798

21 files changed

+663
-1209
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,9 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
261261
void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
262262
TargetInfo::adjust(Diags, Opts);
263263
// ToDo: There are still a few places using default address space as private
264-
// address space in OpenCL, which needs to be cleaned up, then the references
265-
// to OpenCL can be removed from the following line.
266-
setAddressSpaceMap((Opts.OpenCL && !Opts.OpenCLGenericAddressSpace) ||
264+
// address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
265+
// can be removed from the following line.
266+
setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
267267
!isAMDGCN(getTriple()));
268268
}
269269

clang/lib/CodeGen/CGBlocks.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1396,8 +1396,7 @@ void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
13961396
DI->setLocation(D->getLocation());
13971397
DI->EmitDeclareOfBlockLiteralArgVariable(
13981398
*BlockInfo, D->getName(), argNum,
1399-
cast<llvm::AllocaInst>(alloc.getPointer()->stripPointerCasts()),
1400-
Builder);
1399+
cast<llvm::AllocaInst>(alloc.getPointer()), Builder);
14011400
}
14021401
}
14031402

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6092,13 +6092,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
60926092
/*IndexTypeQuals=*/0);
60936093
auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
60946094
llvm::Value *TmpPtr = Tmp.getPointer();
6095-
// The EmitLifetime* pair expect a naked Alloca as their last argument,
6096-
// however for cases where the default AS is not the Alloca AS, Tmp is
6097-
// actually the Alloca ascasted to the default AS, hence the
6098-
// stripPointerCasts()
6099-
llvm::Value *Alloca = TmpPtr->stripPointerCasts();
61006095
llvm::Value *TmpSize = EmitLifetimeStart(
6101-
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), Alloca);
6096+
CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
61026097
llvm::Value *ElemPtr;
61036098
// Each of the following arguments specifies the size of the corresponding
61046099
// argument passed to the enqueued block.
@@ -6114,9 +6109,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
61146109
Builder.CreateAlignedStore(
61156110
V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));
61166111
}
6117-
// Return the Alloca itself rather than a potential ascast as this is only
6118-
// used by the paired EmitLifetimeEnd.
6119-
return std::tie(ElemPtr, TmpSize, Alloca);
6112+
return std::tie(ElemPtr, TmpSize, TmpPtr);
61206113
};
61216114

61226115
// Could have events and/or varargs.

clang/test/CodeGen/scoped-fence-ops.c

Lines changed: 120 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
22
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
3-
// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
3+
// RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL12 %s
44
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \
5-
// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s
5+
// RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN-CL20 %s
66
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \
77
// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s
88
// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-unknown-linux-gnu -ffreestanding \
@@ -30,34 +30,62 @@ void fe1a() {
3030
__scoped_atomic_thread_fence(__ATOMIC_RELEASE, __MEMORY_SCOPE_WRKGRP);
3131
}
3232

33-
// AMDGCN-LABEL: define hidden void @fe1b(
34-
// AMDGCN-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
35-
// AMDGCN-NEXT: [[ENTRY:.*:]]
36-
// AMDGCN-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
37-
// AMDGCN-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
38-
// AMDGCN-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
39-
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
40-
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
41-
// AMDGCN-NEXT: i32 1, label %[[ACQUIRE:.*]]
42-
// AMDGCN-NEXT: i32 2, label %[[ACQUIRE]]
43-
// AMDGCN-NEXT: i32 3, label %[[RELEASE:.*]]
44-
// AMDGCN-NEXT: i32 4, label %[[ACQREL:.*]]
45-
// AMDGCN-NEXT: i32 5, label %[[SEQCST:.*]]
46-
// AMDGCN-NEXT: ]
47-
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
48-
// AMDGCN-NEXT: ret void
49-
// AMDGCN: [[ACQUIRE]]:
50-
// AMDGCN-NEXT: fence syncscope("workgroup") acquire
51-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
52-
// AMDGCN: [[RELEASE]]:
53-
// AMDGCN-NEXT: fence syncscope("workgroup") release
54-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
55-
// AMDGCN: [[ACQREL]]:
56-
// AMDGCN-NEXT: fence syncscope("workgroup") acq_rel
57-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
58-
// AMDGCN: [[SEQCST]]:
59-
// AMDGCN-NEXT: fence syncscope("workgroup") seq_cst
60-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
33+
// AMDGCN-CL12-LABEL: define hidden void @fe1b(
34+
// AMDGCN-CL12-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
35+
// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
36+
// AMDGCN-CL12-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
37+
// AMDGCN-CL12-NEXT: [[ORD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ORD_ADDR]] to ptr
38+
// AMDGCN-CL12-NEXT: store i32 [[ORD]], ptr [[ORD_ADDR_ASCAST]], align 4
39+
// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[ORD_ADDR_ASCAST]], align 4
40+
// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
41+
// AMDGCN-CL12-NEXT: i32 1, label %[[ACQUIRE:.*]]
42+
// AMDGCN-CL12-NEXT: i32 2, label %[[ACQUIRE]]
43+
// AMDGCN-CL12-NEXT: i32 3, label %[[RELEASE:.*]]
44+
// AMDGCN-CL12-NEXT: i32 4, label %[[ACQREL:.*]]
45+
// AMDGCN-CL12-NEXT: i32 5, label %[[SEQCST:.*]]
46+
// AMDGCN-CL12-NEXT: ]
47+
// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
48+
// AMDGCN-CL12-NEXT: ret void
49+
// AMDGCN-CL12: [[ACQUIRE]]:
50+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acquire
51+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
52+
// AMDGCN-CL12: [[RELEASE]]:
53+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
54+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
55+
// AMDGCN-CL12: [[ACQREL]]:
56+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") acq_rel
57+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
58+
// AMDGCN-CL12: [[SEQCST]]:
59+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") seq_cst
60+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
61+
//
62+
// AMDGCN-CL20-LABEL: define hidden void @fe1b(
63+
// AMDGCN-CL20-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
64+
// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
65+
// AMDGCN-CL20-NEXT: [[ORD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
66+
// AMDGCN-CL20-NEXT: store i32 [[ORD]], ptr addrspace(5) [[ORD_ADDR]], align 4
67+
// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[ORD_ADDR]], align 4
68+
// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
69+
// AMDGCN-CL20-NEXT: i32 1, label %[[ACQUIRE:.*]]
70+
// AMDGCN-CL20-NEXT: i32 2, label %[[ACQUIRE]]
71+
// AMDGCN-CL20-NEXT: i32 3, label %[[RELEASE:.*]]
72+
// AMDGCN-CL20-NEXT: i32 4, label %[[ACQREL:.*]]
73+
// AMDGCN-CL20-NEXT: i32 5, label %[[SEQCST:.*]]
74+
// AMDGCN-CL20-NEXT: ]
75+
// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
76+
// AMDGCN-CL20-NEXT: ret void
77+
// AMDGCN-CL20: [[ACQUIRE]]:
78+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acquire
79+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
80+
// AMDGCN-CL20: [[RELEASE]]:
81+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
82+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
83+
// AMDGCN-CL20: [[ACQREL]]:
84+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") acq_rel
85+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
86+
// AMDGCN-CL20: [[SEQCST]]:
87+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") seq_cst
88+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
6189
//
6290
// SPIRV-LABEL: define hidden spir_func void @fe1b(
6391
// SPIRV-SAME: i32 noundef [[ORD:%.*]]) #[[ATTR0]] {
@@ -119,37 +147,68 @@ void fe1b(int ord) {
119147
__scoped_atomic_thread_fence(ord, __MEMORY_SCOPE_WRKGRP);
120148
}
121149

122-
// AMDGCN-LABEL: define hidden void @fe1c(
123-
// AMDGCN-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
124-
// AMDGCN-NEXT: [[ENTRY:.*:]]
125-
// AMDGCN-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
126-
// AMDGCN-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
127-
// AMDGCN-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
128-
// AMDGCN-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
129-
// AMDGCN-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
130-
// AMDGCN-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
131-
// AMDGCN-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
132-
// AMDGCN-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
133-
// AMDGCN-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
134-
// AMDGCN-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
135-
// AMDGCN-NEXT: ]
136-
// AMDGCN: [[ATOMIC_SCOPE_CONTINUE]]:
137-
// AMDGCN-NEXT: ret void
138-
// AMDGCN: [[DEVICE_SCOPE]]:
139-
// AMDGCN-NEXT: fence syncscope("agent") release
140-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
141-
// AMDGCN: [[SYSTEM_SCOPE]]:
142-
// AMDGCN-NEXT: fence release
143-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
144-
// AMDGCN: [[WORKGROUP_SCOPE]]:
145-
// AMDGCN-NEXT: fence syncscope("workgroup") release
146-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
147-
// AMDGCN: [[WAVEFRONT_SCOPE]]:
148-
// AMDGCN-NEXT: fence syncscope("wavefront") release
149-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
150-
// AMDGCN: [[SINGLE_SCOPE]]:
151-
// AMDGCN-NEXT: fence syncscope("singlethread") release
152-
// AMDGCN-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
150+
// AMDGCN-CL12-LABEL: define hidden void @fe1c(
151+
// AMDGCN-CL12-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
152+
// AMDGCN-CL12-NEXT: [[ENTRY:.*:]]
153+
// AMDGCN-CL12-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
154+
// AMDGCN-CL12-NEXT: [[SCOPE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCOPE_ADDR]] to ptr
155+
// AMDGCN-CL12-NEXT: store i32 [[SCOPE]], ptr [[SCOPE_ADDR_ASCAST]], align 4
156+
// AMDGCN-CL12-NEXT: [[TMP0:%.*]] = load i32, ptr [[SCOPE_ADDR_ASCAST]], align 4
157+
// AMDGCN-CL12-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
158+
// AMDGCN-CL12-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
159+
// AMDGCN-CL12-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
160+
// AMDGCN-CL12-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
161+
// AMDGCN-CL12-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
162+
// AMDGCN-CL12-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
163+
// AMDGCN-CL12-NEXT: ]
164+
// AMDGCN-CL12: [[ATOMIC_SCOPE_CONTINUE]]:
165+
// AMDGCN-CL12-NEXT: ret void
166+
// AMDGCN-CL12: [[DEVICE_SCOPE]]:
167+
// AMDGCN-CL12-NEXT: fence syncscope("agent") release
168+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
169+
// AMDGCN-CL12: [[SYSTEM_SCOPE]]:
170+
// AMDGCN-CL12-NEXT: fence release
171+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
172+
// AMDGCN-CL12: [[WORKGROUP_SCOPE]]:
173+
// AMDGCN-CL12-NEXT: fence syncscope("workgroup") release
174+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
175+
// AMDGCN-CL12: [[WAVEFRONT_SCOPE]]:
176+
// AMDGCN-CL12-NEXT: fence syncscope("wavefront") release
177+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
178+
// AMDGCN-CL12: [[SINGLE_SCOPE]]:
179+
// AMDGCN-CL12-NEXT: fence syncscope("singlethread") release
180+
// AMDGCN-CL12-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
181+
//
182+
// AMDGCN-CL20-LABEL: define hidden void @fe1c(
183+
// AMDGCN-CL20-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {
184+
// AMDGCN-CL20-NEXT: [[ENTRY:.*:]]
185+
// AMDGCN-CL20-NEXT: [[SCOPE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
186+
// AMDGCN-CL20-NEXT: store i32 [[SCOPE]], ptr addrspace(5) [[SCOPE_ADDR]], align 4
187+
// AMDGCN-CL20-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[SCOPE_ADDR]], align 4
188+
// AMDGCN-CL20-NEXT: switch i32 [[TMP0]], label %[[ATOMIC_SCOPE_CONTINUE:.*]] [
189+
// AMDGCN-CL20-NEXT: i32 1, label %[[DEVICE_SCOPE:.*]]
190+
// AMDGCN-CL20-NEXT: i32 0, label %[[SYSTEM_SCOPE:.*]]
191+
// AMDGCN-CL20-NEXT: i32 2, label %[[WORKGROUP_SCOPE:.*]]
192+
// AMDGCN-CL20-NEXT: i32 3, label %[[WAVEFRONT_SCOPE:.*]]
193+
// AMDGCN-CL20-NEXT: i32 4, label %[[SINGLE_SCOPE:.*]]
194+
// AMDGCN-CL20-NEXT: ]
195+
// AMDGCN-CL20: [[ATOMIC_SCOPE_CONTINUE]]:
196+
// AMDGCN-CL20-NEXT: ret void
197+
// AMDGCN-CL20: [[DEVICE_SCOPE]]:
198+
// AMDGCN-CL20-NEXT: fence syncscope("agent") release
199+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
200+
// AMDGCN-CL20: [[SYSTEM_SCOPE]]:
201+
// AMDGCN-CL20-NEXT: fence release
202+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
203+
// AMDGCN-CL20: [[WORKGROUP_SCOPE]]:
204+
// AMDGCN-CL20-NEXT: fence syncscope("workgroup") release
205+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
206+
// AMDGCN-CL20: [[WAVEFRONT_SCOPE]]:
207+
// AMDGCN-CL20-NEXT: fence syncscope("wavefront") release
208+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
209+
// AMDGCN-CL20: [[SINGLE_SCOPE]]:
210+
// AMDGCN-CL20-NEXT: fence syncscope("singlethread") release
211+
// AMDGCN-CL20-NEXT: br label %[[ATOMIC_SCOPE_CONTINUE]]
153212
//
154213
// SPIRV-LABEL: define hidden spir_func void @fe1c(
155214
// SPIRV-SAME: i32 noundef [[SCOPE:%.*]]) #[[ATTR0]] {

0 commit comments

Comments
 (0)