Skip to content

Commit 2403865

Browse files
committed
[OpenACC][CIR] implement 'collapse' lowering for combined constructs
Another trivial implementation. It has a constant value that doesn't require any insertion of instructions, so this just works with minimal effort.
1 parent 9be4d64 commit 2403865

File tree

2 files changed

+45
-3
lines changed

2 files changed

+45
-3
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -414,10 +414,10 @@ class OpenACCClauseCIREmitter final
414414
value = value.sextOrTrunc(64);
415415
operation.setCollapseForDeviceTypes(builder.getContext(),
416416
lastDeviceTypeValues, value);
417+
} else if constexpr (isCombinedType<OpTy>) {
418+
applyToLoopOp(clause);
417419
} else {
418-
// TODO: When we've implemented this for everything, switch this to an
419-
// unreachable. Combined constructs remain.
420-
return clauseNotImplemented(clause);
420+
llvm_unreachable("Unknown construct kind in VisitCollapseClause");
421421
}
422422
}
423423

clang/test/CIR/CodeGenOpenACC/combined.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,4 +134,46 @@ extern "C" void acc_combined(int N) {
134134
// CHECK: acc.terminator
135135
// CHECK-NEXT: } loc
136136

137+
#pragma acc parallel loop collapse(1) device_type(radeon)
138+
for(unsigned I = 0; I < N; ++I)
139+
for(unsigned J = 0; J < N; ++J)
140+
for(unsigned K = 0; K < N; ++K);
141+
// CHECK: acc.parallel combined(loop) {
142+
// CHECK: acc.loop combined(parallel) {
143+
// CHECK: acc.yield
144+
// CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
145+
// CHECK: acc.yield
146+
// CHECK-NEXT: } loc
147+
148+
#pragma acc serial loop collapse(1) device_type(radeon) collapse (2)
149+
for(unsigned I = 0; I < N; ++I)
150+
for(unsigned J = 0; J < N; ++J)
151+
for(unsigned K = 0; K < N; ++K);
152+
// CHECK: acc.serial combined(loop) {
153+
// CHECK: acc.loop combined(serial) {
154+
// CHECK: acc.yield
155+
// CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
156+
// CHECK: acc.yield
157+
// CHECK-NEXT: } loc
158+
159+
#pragma acc kernels loop collapse(1) device_type(radeon, nvidia) collapse (2)
160+
for(unsigned I = 0; I < N; ++I)
161+
for(unsigned J = 0; J < N; ++J)
162+
for(unsigned K = 0; K < N; ++K);
163+
// CHECK: acc.kernels combined(loop) {
164+
// CHECK: acc.loop combined(kernels) {
165+
// CHECK: acc.yield
166+
// CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
167+
// CHECK: acc.terminator
168+
// CHECK-NEXT: } loc
169+
#pragma acc parallel loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
170+
for(unsigned I = 0; I < N; ++I)
171+
for(unsigned J = 0; J < N; ++J)
172+
for(unsigned K = 0; K < N; ++K);
173+
// CHECK: acc.parallel combined(loop) {
174+
// CHECK: acc.loop combined(parallel) {
175+
// CHECK: acc.yield
176+
// CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
177+
// CHECK: acc.yield
178+
// CHECK-NEXT: } loc
137179
}

0 commit comments

Comments
 (0)