Skip to content

Commit c5488c8

Browse files
committed
[OpenMP] Properly set static thread limit (w/o analysis)
We used to have two separate implementations to derive the number of threads used in a target region. This lead us to sometimes miss out on user provided thread bounds (num_threads, or thread_limit) when we looked for "constant default values". If we might miss out on the presence of those bounds, we cannot set the thread_limit statically since the runtime will try to honor user input rather than cap it at the "preferred default". This patch replaces the secondary implementation with the primary in a mode that will not emit code but just look for the presence, and potentially upper bounds, of thread limiting clauses. The runtime test would not pass without this rewrite as we missed some clauses, set the static limit on the device to the preferred value, but then violated that value at runtime. Fixes: llvm#64845 Differential Revision: https://reviews.llvm.org/D158381
1 parent e7aed22 commit c5488c8

File tree

118 files changed

+6814
-6865
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+6814
-6865
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 210 additions & 339 deletions
Large diffs are not rendered by default.

clang/lib/CodeGen/CGOpenMPRuntime.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -640,18 +640,19 @@ class CGOpenMPRuntime {
640640
int32_t &DefaultVal);
641641
llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
642642
const OMPExecutableDirective &D);
643-
/// Emit the number of threads for a target directive. Inspect the
644-
/// thread_limit clause associated with a teams construct combined or closely
645-
/// nested with the target directive.
646-
///
647-
/// Emit the num_threads clause for directives such as 'target parallel' that
648-
/// have no associated teams construct.
649-
///
650-
/// Otherwise, return nullptr.
651-
const Expr *
652-
getNumThreadsExprForTargetDirective(CodeGenFunction &CGF,
653-
const OMPExecutableDirective &D,
654-
int32_t &DefaultVal);
643+
644+
/// Check for a number of threads upper bound constant value (stored in \p
645+
/// UpperBound), or expression (returned). If the value is conditional (via an
646+
/// if-clause), store the condition in \p CondExpr. Similarly, a potential
647+
/// thread limit expression is stored in \p ThreadLimitExpr. If \p
648+
/// UpperBoundOnly is true, no expression evaluation is perfomed.
649+
const Expr *getNumThreadsExprForTargetDirective(
650+
CodeGenFunction &CGF, const OMPExecutableDirective &D,
651+
uint32_t &UpperBound, bool UpperBoundOnly,
652+
llvm::Value **CondExpr = nullptr, const Expr **ThreadLimitExpr = nullptr);
653+
654+
/// Emit an expression that denotes the number of threads a target region
655+
/// shall use. Will generate "i32 0" to allow the runtime to choose.
655656
llvm::Value *
656657
emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
657658
const OMPExecutableDirective &D);

clang/test/OpenMP/bug60602.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) {
347347
//
348348
//
349349
// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18kernel_within_loopPiS_ii_l13
350-
// CHECK-SAME: (i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR2]] {
350+
// CHECK-SAME: (i64 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR5:[0-9]+]] {
351351
// CHECK-NEXT: entry:
352352
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
353353
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
@@ -571,7 +571,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) {
571571
//
572572
//
573573
// CHECK-LABEL: define internal void @.omp_offloading.requires_reg
574-
// CHECK-SAME: () #[[ATTR5:[0-9]+]] {
574+
// CHECK-SAME: () #[[ATTR6:[0-9]+]] {
575575
// CHECK-NEXT: entry:
576576
// CHECK-NEXT: call void @__tgt_register_requires(i64 1)
577577
// CHECK-NEXT: ret void

clang/test/OpenMP/distribute_codegen.cpp

Lines changed: 32 additions & 32 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_firstprivate_codegen.cpp

Lines changed: 36 additions & 36 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_lastprivate_codegen.cpp

Lines changed: 36 additions & 36 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_codegen.cpp

Lines changed: 118 additions & 118 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp

Lines changed: 50 additions & 50 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp

Lines changed: 31 additions & 31 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp

Lines changed: 50 additions & 50 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp

Lines changed: 152 additions & 152 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp

Lines changed: 50 additions & 50 deletions
Large diffs are not rendered by default.

clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ int main() {
9292
// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
9393
// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
9494
// CHECK1: omp_offload.failed:
95-
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37() #[[ATTR2:[0-9]+]]
95+
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37() #[[ATTR3:[0-9]+]]
9696
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
9797
// CHECK1: omp_offload.cont:
9898
// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS2]], i32 0, i32 0
@@ -125,7 +125,7 @@ int main() {
125125
// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
126126
// CHECK1-NEXT: br i1 [[TMP29]], label [[OMP_OFFLOAD_FAILED3:%.*]], label [[OMP_OFFLOAD_CONT4:%.*]]
127127
// CHECK1: omp_offload.failed3:
128-
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41() #[[ATTR2]]
128+
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41() #[[ATTR3]]
129129
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT4]]
130130
// CHECK1: omp_offload.cont4:
131131
// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v()
@@ -140,7 +140,7 @@ int main() {
140140
//
141141
//
142142
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined
143-
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
143+
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
144144
// CHECK1-NEXT: entry:
145145
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
146146
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -201,7 +201,7 @@ int main() {
201201
//
202202
//
203203
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37.omp_outlined.omp_outlined
204-
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
204+
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] {
205205
// CHECK1-NEXT: entry:
206206
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
207207
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -278,7 +278,7 @@ int main() {
278278
//
279279
//
280280
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined
281-
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
281+
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
282282
// CHECK1-NEXT: entry:
283283
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
284284
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -339,7 +339,7 @@ int main() {
339339
//
340340
//
341341
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.omp_outlined.omp_outlined
342-
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
342+
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] {
343343
// CHECK1-NEXT: entry:
344344
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
345345
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -409,7 +409,7 @@ int main() {
409409
//
410410
//
411411
// CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v
412-
// CHECK1-SAME: () #[[ATTR3:[0-9]+]] comdat {
412+
// CHECK1-SAME: () #[[ATTR4:[0-9]+]] comdat {
413413
// CHECK1-NEXT: entry:
414414
// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
415415
// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
@@ -443,7 +443,7 @@ int main() {
443443
// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
444444
// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
445445
// CHECK1: omp_offload.failed:
446-
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29() #[[ATTR2]]
446+
// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29() #[[ATTR3]]
447447
// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
448448
// CHECK1: omp_offload.cont:
449449
// CHECK1-NEXT: ret i32 0
@@ -457,7 +457,7 @@ int main() {
457457
//
458458
//
459459
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29.omp_outlined
460-
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
460+
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] {
461461
// CHECK1-NEXT: entry:
462462
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
463463
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -518,7 +518,7 @@ int main() {
518518
//
519519
//
520520
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29.omp_outlined.omp_outlined
521-
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
521+
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] {
522522
// CHECK1-NEXT: entry:
523523
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
524524
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -588,7 +588,7 @@ int main() {
588588
//
589589
//
590590
// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
591-
// CHECK1-SAME: () #[[ATTR4:[0-9]+]] {
591+
// CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
592592
// CHECK1-NEXT: entry:
593593
// CHECK1-NEXT: call void @__tgt_register_requires(i64 1)
594594
// CHECK1-NEXT: ret void

0 commit comments

Comments
 (0)