Skip to content

Commit c81b430

Browse files
authored
[flang][cuda] Fix lowering of cuf kernel with unstructured nested construct (#107149)
Lowering was crashing when cuf kernels has an unstructured construct. Blocks created by PFT need to be re-created inside of the operation like it is done for OpenACC construct.
1 parent fe454b2 commit c81b430

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

flang/lib/Lower/Bridge.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "flang/Lower/Bridge.h"
14+
#include "DirectivesCommon.h"
1415
#include "flang/Common/Version.h"
1516
#include "flang/Lower/Allocatable.h"
1617
#include "flang/Lower/CallInterface.h"
@@ -2999,14 +3000,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
29993000
mlir::Block &b = op.getRegion().back();
30003001
builder->setInsertionPointToStart(&b);
30013002

3003+
Fortran::lower::pft::Evaluation *crtEval = &getEval();
3004+
if (crtEval->lowerAsUnstructured())
3005+
Fortran::lower::createEmptyRegionBlocks<fir::FirEndOp>(
3006+
*builder, crtEval->getNestedEvaluations());
3007+
builder->setInsertionPointToStart(&b);
3008+
30023009
for (auto [arg, value] : llvm::zip(
30033010
op.getLoopRegions().front()->front().getArguments(), ivValues)) {
30043011
mlir::Value convArg =
30053012
builder->createConvert(loc, fir::unwrapRefType(value.getType()), arg);
30063013
builder->create<fir::StoreOp>(loc, convArg, value);
30073014
}
30083015

3009-
Fortran::lower::pft::Evaluation *crtEval = &getEval();
30103016
if (crtEval->lowerAsStructured()) {
30113017
crtEval = &crtEval->getFirstNestedEvaluation();
30123018
for (int64_t i = 1; i < nestedLoops; i++)

flang/test/Lower/CUDA/cuda-kernel-loop-directive.cuf

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,23 @@ end
7878
! CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]]#0 : !fir.ref<i64>
7979
! CHECK: %[[STREAM_I32:.*]] = fir.convert %[[STREAM_LOAD]] : (i64) -> i32
8080
! CHECK: cuf.kernel<<<*, *, stream = %[[STREAM_I32]]>>>
81+
82+
83+
! Test lowering with unstructured construct inside.
84+
subroutine sub2(m,a,b)
85+
integer :: m
86+
real, device :: a(m,m), b(m)
87+
integer :: i,j
88+
!$cuf kernel do<<<*,*>>>
89+
90+
do j = 1, m
91+
i = 1
92+
do while (a(i,j).eq.0)
93+
i = i + 1
94+
end do
95+
b(j) = i
96+
end do
97+
end subroutine
98+
99+
! CHECK-LABEL: func.func @_QPsub2
100+
! CHECK: cuf.kernel

0 commit comments

Comments
 (0)