Skip to content

Commit d533e86

Browse files
jdoerfertronlieb
authored andcommitted
[OpenMP] Introduce the KernelLaunchEnvironment as implicit argument (llvm#70401)
The KernelEnvironment is for compile time information about a kernel. It allows the compiler to feed information to the runtime. The KernelLaunchEnvironment is for dynamic information *per* kernel launch. It allows the rutime to feed information to the kernel that is not shared with other invocations of the kernel. The first use case is to replace the globals that synchronize teams reductions with per-launch versions. This allows concurrent teams reductions. More uses cases will follow, e.g., per launch memory pools. Fixes: llvm#70249 Change-Id: I06ce8c63cf5020be778e1a9e06053a1950dfb18e
1 parent a999d61 commit d533e86

File tree

196 files changed

+11858
-9779
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

196 files changed

+11858
-9779
lines changed

clang/lib/Sema/SemaOpenMP.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4250,12 +4250,15 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
42504250
getCurCapturedRegion()->TheCapturedDecl->addAttr(
42514251
AlwaysInlineAttr::CreateImplicit(
42524252
Context, {}, AlwaysInlineAttr::Keyword_forceinline));
4253-
Sema::CapturedParamNameType ParamsTarget[] = {
4254-
std::make_pair(StringRef(), QualType()) // __context with shared vars
4255-
};
4253+
SmallVector<Sema::CapturedParamNameType, 2> ParamsTarget;
4254+
if (getLangOpts().OpenMPIsTargetDevice)
4255+
ParamsTarget.push_back(std::make_pair(StringRef("dyn_ptr"), VoidPtrTy));
4256+
ParamsTarget.push_back(
4257+
std::make_pair(StringRef(), QualType())); // __context with shared vars;
42564258
// Start a captured region for 'target' with no implicit parameters.
42574259
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
4258-
ParamsTarget, /*OpenMPCaptureLevel=*/1);
4260+
ParamsTarget,
4261+
/*OpenMPCaptureLevel=*/1);
42594262
Sema::CapturedParamNameType ParamsTeamsOrParallel[] = {
42604263
std::make_pair(".global_tid.", KmpInt32PtrTy),
42614264
std::make_pair(".bound_tid.", KmpInt32PtrTy),
@@ -4294,8 +4297,13 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
42944297
getCurCapturedRegion()->TheCapturedDecl->addAttr(
42954298
AlwaysInlineAttr::CreateImplicit(
42964299
Context, {}, AlwaysInlineAttr::Keyword_forceinline));
4300+
SmallVector<Sema::CapturedParamNameType, 2> ParamsTarget;
4301+
if (getLangOpts().OpenMPIsTargetDevice)
4302+
ParamsTarget.push_back(std::make_pair(StringRef("dyn_ptr"), VoidPtrTy));
4303+
ParamsTarget.push_back(
4304+
std::make_pair(StringRef(), QualType())); // __context with shared vars;
42974305
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
4298-
std::make_pair(StringRef(), QualType()),
4306+
ParamsTarget,
42994307
/*OpenMPCaptureLevel=*/1);
43004308
break;
43014309
}
@@ -4502,9 +4510,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
45024510
getCurCapturedRegion()->TheCapturedDecl->addAttr(
45034511
AlwaysInlineAttr::CreateImplicit(
45044512
Context, {}, AlwaysInlineAttr::Keyword_forceinline));
4505-
Sema::CapturedParamNameType ParamsTarget[] = {
4506-
std::make_pair(StringRef(), QualType()) // __context with shared vars
4507-
};
4513+
SmallVector<Sema::CapturedParamNameType, 2> ParamsTarget;
4514+
if (getLangOpts().OpenMPIsTargetDevice)
4515+
ParamsTarget.push_back(std::make_pair(StringRef("dyn_ptr"), VoidPtrTy));
4516+
ParamsTarget.push_back(
4517+
std::make_pair(StringRef(), QualType())); // __context with shared vars;
45084518
// Start a captured region for 'target' with no implicit parameters.
45094519
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
45104520
ParamsTarget, /*OpenMPCaptureLevel=*/1);

clang/test/OpenMP/amdgcn_target_codegen.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,18 @@ int test_amdgcn_target_tid_threads_simd() {
2929

3030
#endif
3131
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14
32-
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
32+
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
3333
// CHECK-NEXT: entry:
34+
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3435
// CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
3536
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
37+
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
3638
// CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
3739
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
40+
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
3841
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
3942
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
40-
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr))
43+
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr), ptr [[DYN_PTR]])
4144
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
4245
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
4346
// CHECK: user_code.entry:
@@ -66,19 +69,22 @@ int test_amdgcn_target_tid_threads_simd() {
6669
//
6770
//
6871
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23
69-
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
72+
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
7073
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
7175
// CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
7276
// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
7377
// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
7478
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
79+
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
7580
// CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
7681
// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
7782
// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
7883
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
84+
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
7985
// CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
8086
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
81-
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr))
87+
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr), ptr [[DYN_PTR]])
8288
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
8389
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
8490
// CHECK: user_code.entry:

clang/test/OpenMP/amdgcn_target_device_vla.cpp

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,21 +97,24 @@ int main() {
9797

9898
#endif
9999
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1v_l12
100-
// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] {
100+
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] {
101101
// CHECK-NEXT: entry:
102+
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
102103
// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
103104
// CHECK-NEXT: [[N:%.*]] = alloca i32, align 4, addrspace(5)
104105
// CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5)
105106
// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
106107
// CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4, addrspace(5)
108+
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
107109
// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr
108110
// CHECK-NEXT: [[N_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N]] to ptr
109111
// CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr
110112
// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
111113
// CHECK-NEXT: [[I1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I1]] to ptr
114+
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
112115
// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8
113116
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8
114-
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1v_l12_kernel_environment to ptr))
117+
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo1v_l12_kernel_environment to ptr), ptr [[DYN_PTR]])
115118
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
116119
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
117120
// CHECK: user_code.entry:
@@ -174,8 +177,9 @@ int main() {
174177
//
175178
//
176179
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo2v_l30
177-
// CHECK-SAME: (i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR3:[0-9]+]] {
180+
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR3:[0-9]+]] {
178181
// CHECK-NEXT: entry:
182+
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
179183
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
180184
// CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
181185
// CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -190,6 +194,7 @@ int main() {
190194
// CHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8, addrspace(5)
191195
// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
192196
// CHECK-NEXT: [[J9:%.*]] = alloca i32, align 4, addrspace(5)
197+
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
193198
// CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
194199
// CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
195200
// CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
@@ -204,6 +209,7 @@ int main() {
204209
// CHECK-NEXT: [[__VLA_EXPR0_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__VLA_EXPR0]] to ptr
205210
// CHECK-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
206211
// CHECK-NEXT: [[J9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J9]] to ptr
212+
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
207213
// CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
208214
// CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
209215
// CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
@@ -318,26 +324,29 @@ int main() {
318324
//
319325
//
320326
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52
321-
// CHECK-SAME: (i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR0]] {
327+
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR0]] {
322328
// CHECK-NEXT: entry:
329+
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
323330
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
324331
// CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
325332
// CHECK-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
326333
// CHECK-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
327334
// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
328335
// CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
336+
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
329337
// CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
330338
// CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
331339
// CHECK-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RESULT_ADDR]] to ptr
332340
// CHECK-NEXT: [[M_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_CASTED]] to ptr
333341
// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
334342
// CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
343+
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
335344
// CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
336345
// CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
337346
// CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
338347
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
339348
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
340-
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_kernel_environment to ptr))
349+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo3v_l52_kernel_environment to ptr), ptr [[DYN_PTR]])
341350
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
342351
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
343352
// CHECK: user_code.entry:
@@ -672,8 +681,9 @@ int main() {
672681
//
673682
//
674683
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76
675-
// CHECK-SAME: (i64 noundef [[M:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR0]] {
684+
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[M:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]]) #[[ATTR0]] {
676685
// CHECK-NEXT: entry:
686+
// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
677687
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
678688
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
679689
// CHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
@@ -682,6 +692,7 @@ int main() {
682692
// CHECK-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
683693
// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
684694
// CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
695+
// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
685696
// CHECK-NEXT: [[M_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[M_ADDR]] to ptr
686697
// CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
687698
// CHECK-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
@@ -690,13 +701,14 @@ int main() {
690701
// CHECK-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
691702
// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
692703
// CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
704+
// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
693705
// CHECK-NEXT: store i64 [[M]], ptr [[M_ADDR_ASCAST]], align 8
694706
// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
695707
// CHECK-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
696708
// CHECK-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
697709
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
698710
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 8
699-
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_kernel_environment to ptr))
711+
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4foo4v_l76_kernel_environment to ptr), ptr [[DYN_PTR]])
700712
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
701713
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
702714
// CHECK: user_code.entry:

clang/test/OpenMP/amdgcn_target_init_temp_alloca.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ int test_amdgcn_target_temp_alloca() {
1111

1212
int arr[N];
1313

14+
// CHECK: [[DYN_PTR_ADDR:%.+]] = alloca ptr, align 8, addrspace(5)
1415
// CHECK: [[VAR_ADDR:%.+]] = alloca ptr, align 8, addrspace(5)
1516
// CHECK-NEXT: [[VAR2_ADDR:%.+]] = alloca i32, align 4, addrspace(5)
17+
// CHECK-NEXT: [[DYN_PTR_ADDR_CAST:%.+]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
1618
// CHECK-NEXT: [[VAR_ADDR_CAST:%.+]] = addrspacecast ptr addrspace(5) [[VAR_ADDR]] to ptr
1719
// CHECK-NEXT: [[VAR2_ADDR_CAST:%.+]] = addrspacecast ptr addrspace(5) [[VAR2_ADDR]] to ptr
1820
// CHECK: store ptr [[VAR:%.+]], ptr [[VAR_ADDR_CAST]], align 8

0 commit comments

Comments
 (0)