Skip to content

Commit 881f6de

Browse files
authored
[OpenACC][CIR] Lower 'wait' clause for compute/data constructs (#137359)
The 'wait' clause is a bit complicated, and is laid out awkwardly in the IR addative functions, so this patch has to do a little bit of work to do that (mostly the 'devnum' work). Otherwise, this is very similar to how num_gangs works, with the additional complexity of the 'empty' wait being represented differently as well, but this is similar to how 'async' and a few others work as well.
1 parent 3c4dff3 commit 881f6de

File tree

8 files changed

+500
-2
lines changed

8 files changed

+500
-2
lines changed

clang/include/clang/AST/OpenACCClause.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,9 @@ class OpenACCWaitClause final
565565
llvm::ArrayRef<Expr *> getQueueIdExprs() const {
566566
return OpenACCClauseWithExprs::getExprs().drop_front();
567567
}
568+
// If this is a plain `wait` (no parens) this returns 'false'. Else Sema/Parse
569+
// ensures we have at least one QueueId expression.
570+
bool hasExprs() const { return getLParenLoc().isValid(); }
568571
};
569572

570573
class OpenACCNumGangsClause final

clang/lib/AST/OpenACCClause.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ void OpenACCClausePrinter::VisitReductionClause(
865865

866866
void OpenACCClausePrinter::VisitWaitClause(const OpenACCWaitClause &C) {
867867
OS << "wait";
868-
if (!C.getLParenLoc().isInvalid()) {
868+
if (C.hasExprs()) {
869869
OS << "(";
870870
if (C.hasDevNumExpr()) {
871871
OS << "devnum: ";

clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,53 @@ class OpenACCClauseCIREmitter final
386386
}
387387
}
388388

389+
void VisitWaitClause(const OpenACCWaitClause &clause) {
390+
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp, DataOp>) {
391+
if (!clause.hasExprs()) {
392+
operation.setWaitOnlyAttr(
393+
handleDeviceTypeAffectedClause(operation.getWaitOnlyAttr()));
394+
} else {
395+
llvm::SmallVector<mlir::Value> values;
396+
397+
if (clause.hasDevNumExpr())
398+
values.push_back(createIntExpr(clause.getDevNumExpr()));
399+
for (const Expr *E : clause.getQueueIdExprs())
400+
values.push_back(createIntExpr(E));
401+
402+
llvm::SmallVector<int32_t> segments;
403+
if (operation.getWaitOperandsSegments())
404+
llvm::copy(*operation.getWaitOperandsSegments(),
405+
std::back_inserter(segments));
406+
407+
unsigned beforeSegmentSize = segments.size();
408+
409+
mlir::MutableOperandRange range = operation.getWaitOperandsMutable();
410+
operation.setWaitOperandsDeviceTypeAttr(handleDeviceTypeAffectedClause(
411+
operation.getWaitOperandsDeviceTypeAttr(), values, range,
412+
segments));
413+
operation.setWaitOperandsSegments(segments);
414+
415+
// In addition to having to set the 'segments', wait also has a list of
416+
// bool attributes whether it is annotated with 'devnum'. We can use
417+
// our knowledge of how much the 'segments' array grew to determine how
418+
// many we need to add.
419+
llvm::SmallVector<bool> hasDevNums;
420+
if (operation.getHasWaitDevnumAttr())
421+
for (mlir::Attribute A : operation.getHasWaitDevnumAttr())
422+
hasDevNums.push_back(cast<mlir::BoolAttr>(A).getValue());
423+
424+
hasDevNums.insert(hasDevNums.end(), segments.size() - beforeSegmentSize,
425+
clause.hasDevNumExpr());
426+
427+
operation.setHasWaitDevnumAttr(builder.getBoolArrayAttr(hasDevNums));
428+
}
429+
} else {
430+
// TODO: When we've implemented this for everything, switch this to an
431+
// unreachable. Enter data, exit data, update, Combined constructs remain.
432+
return clauseNotImplemented(clause);
433+
}
434+
}
435+
389436
void VisitDefaultAsyncClause(const OpenACCDefaultAsyncClause &clause) {
390437
if constexpr (isOneOfTypes<OpTy, SetOp>) {
391438
operation.getDefaultAsyncMutable().append(

clang/lib/Sema/TreeTransform.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12228,7 +12228,7 @@ void OpenACCClauseTransform<Derived>::VisitVectorClause(
1222812228
template <typename Derived>
1222912229
void OpenACCClauseTransform<Derived>::VisitWaitClause(
1223012230
const OpenACCWaitClause &C) {
12231-
if (!C.getLParenLoc().isInvalid()) {
12231+
if (C.hasExprs()) {
1223212232
Expr *DevNumExpr = nullptr;
1223312233
llvm::SmallVector<Expr *> InstantiatedQueueIdExprs;
1223412234

clang/test/CIR/CodeGenOpenACC/data.c

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,117 @@ void acc_data(int cond) {
109109
// CHECK-NEXT: acc.terminator
110110
// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>}
111111

112+
#pragma acc data default(none) wait
113+
{}
114+
// CHECK-NEXT: acc.data wait {
115+
// CHECK-NEXT: acc.terminator
116+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
117+
118+
#pragma acc data default(none) wait device_type(nvidia) wait
119+
{}
120+
// CHECK-NEXT: acc.data wait([#acc.device_type<none>, #acc.device_type<nvidia>]) {
121+
// CHECK-NEXT: acc.terminator
122+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
123+
124+
#pragma acc data default(none) wait(1) device_type(nvidia) wait
125+
{}
126+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
127+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
128+
// CHECK-NEXT: acc.data wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) {
129+
// CHECK-NEXT: acc.terminator
130+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
131+
132+
#pragma acc data default(none) wait device_type(nvidia) wait(1)
133+
{}
134+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
135+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
136+
// CHECK-NEXT: acc.data wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) {
137+
// CHECK-NEXT: acc.terminator
138+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
139+
140+
#pragma acc data default(none) wait(1) device_type(nvidia) wait(1)
141+
{}
142+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
143+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
144+
// CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
145+
// CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32
146+
// CHECK-NEXT: acc.data wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
147+
// CHECK-NEXT: acc.terminator
148+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
149+
150+
#pragma acc data default(none) wait(devnum: cond : 1)
151+
{}
152+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
153+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
154+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
155+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
156+
// CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
157+
// CHECK-NEXT: acc.terminator
158+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
159+
160+
#pragma acc data default(none) wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1)
161+
{}
162+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
163+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
164+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
165+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
166+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
167+
// CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
168+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
169+
// CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
170+
// CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
171+
// CHECK-NEXT: acc.terminator
172+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
173+
174+
#pragma acc data default(none) wait(devnum: cond : 1, 2)
175+
{}
176+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
177+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
178+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
179+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
180+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
181+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
182+
// CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
183+
// CHECK-NEXT: acc.terminator
184+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
185+
186+
#pragma acc data default(none) wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2)
187+
{}
188+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
189+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
190+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
191+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
192+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
193+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
194+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
195+
// CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
196+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
197+
// CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
198+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
199+
// CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
200+
// CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) {
201+
// CHECK-NEXT: acc.terminator
202+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
203+
204+
#pragma acc data default(none) wait(cond, 1)
205+
{}
206+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
207+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
208+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
209+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
210+
// CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
211+
// CHECK-NEXT: acc.terminator
212+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
213+
214+
#pragma acc data default(none) wait(queues: cond, 1) device_type(radeon)
215+
{}
216+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
217+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
218+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
219+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
220+
// CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
221+
// CHECK-NEXT: acc.terminator
222+
// CHECK-NEXT: attributes {defaultAttr = #acc<defaultvalue none>}
223+
112224
// CHECK-NEXT: cir.return
113225
}

clang/test/CIR/CodeGenOpenACC/kernels.c

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,5 +302,117 @@ void acc_kernels(int cond) {
302302
// CHECK-NEXT: acc.terminator
303303
// CHECK-NEXT: } loc
304304

305+
#pragma acc kernels wait
306+
{}
307+
// CHECK-NEXT: acc.kernels wait {
308+
// CHECK-NEXT: acc.terminator
309+
// CHECK-NEXT: } loc
310+
311+
#pragma acc kernels wait device_type(nvidia) wait
312+
{}
313+
// CHECK-NEXT: acc.kernels wait([#acc.device_type<none>, #acc.device_type<nvidia>]) {
314+
// CHECK-NEXT: acc.terminator
315+
// CHECK-NEXT: } loc
316+
317+
#pragma acc kernels wait(1) device_type(nvidia) wait
318+
{}
319+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
320+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
321+
// CHECK-NEXT: acc.kernels wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) {
322+
// CHECK-NEXT: acc.terminator
323+
// CHECK-NEXT: } loc
324+
325+
#pragma acc kernels wait device_type(nvidia) wait(1)
326+
{}
327+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
328+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
329+
// CHECK-NEXT: acc.kernels wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) {
330+
// CHECK-NEXT: acc.terminator
331+
// CHECK-NEXT: } loc
332+
333+
#pragma acc kernels wait(1) device_type(nvidia) wait(1)
334+
{}
335+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
336+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
337+
// CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i
338+
// CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32
339+
// CHECK-NEXT: acc.kernels wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
340+
// CHECK-NEXT: acc.terminator
341+
// CHECK-NEXT: } loc
342+
343+
#pragma acc kernels wait(devnum: cond : 1)
344+
{}
345+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
346+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
347+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
348+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
349+
// CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
350+
// CHECK-NEXT: acc.terminator
351+
// CHECK-NEXT: } loc
352+
353+
#pragma acc kernels wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1)
354+
{}
355+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
356+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
357+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
358+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
359+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
360+
// CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
361+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
362+
// CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
363+
// CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) {
364+
// CHECK-NEXT: acc.terminator
365+
// CHECK-NEXT: } loc
366+
367+
#pragma acc kernels wait(devnum: cond : 1, 2)
368+
{}
369+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
370+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
371+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
372+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
373+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
374+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
375+
// CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) {
376+
// CHECK-NEXT: acc.terminator
377+
// CHECK-NEXT: } loc
378+
379+
#pragma acc kernels wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2)
380+
{}
381+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
382+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
383+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
384+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
385+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
386+
// CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
387+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
388+
// CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
389+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
390+
// CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
391+
// CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
392+
// CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
393+
// CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) {
394+
// CHECK-NEXT: acc.terminator
395+
// CHECK-NEXT: } loc
396+
397+
#pragma acc kernels wait(cond, 1)
398+
{}
399+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
400+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
401+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
402+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
403+
// CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
404+
// CHECK-NEXT: acc.terminator
405+
// CHECK-NEXT: } loc
406+
407+
#pragma acc kernels wait(queues: cond, 1) device_type(radeon)
408+
{}
409+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
410+
// CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
411+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
412+
// CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
413+
// CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) {
414+
// CHECK-NEXT: acc.terminator
415+
// CHECK-NEXT: } loc
416+
305417
// CHECK-NEXT: cir.return
306418
}

0 commit comments

Comments
 (0)