Skip to content

Commit 29f7392

Browse files
authored
[flang][OpenMP] Rewrite standalone loop (without bind) directives to simd (#122632)
Extends conversion support for `loop` directives. This PR handles standalone `loop` constructs that do not have a `bind` clause attached by rewriting them to equivalent `simd` constructs. The reasoning behind that decision is documented in the rewrite function itself.
1 parent 9ca1323 commit 29f7392

File tree

3 files changed

+123
-22
lines changed

3 files changed

+123
-22
lines changed

flang/lib/Optimizer/OpenMP/GenericLoopConversion.cpp

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,39 @@ class GenericLoopConversionPattern
3030
: public mlir::OpConversionPattern<mlir::omp::LoopOp> {
3131
public:
3232
enum class GenericLoopCombinedInfo {
33-
None,
33+
Standalone,
3434
TargetTeamsLoop,
3535
TargetParallelLoop
3636
};
3737

3838
using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
3939

40+
explicit GenericLoopConversionPattern(mlir::MLIRContext *ctx)
41+
: mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} {
42+
// Enable rewrite recursion to make sure nested `loop` directives are
43+
// handled.
44+
this->setHasBoundedRewriteRecursion(true);
45+
}
46+
4047
mlir::LogicalResult
4148
matchAndRewrite(mlir::omp::LoopOp loopOp, OpAdaptor adaptor,
4249
mlir::ConversionPatternRewriter &rewriter) const override {
4350
assert(mlir::succeeded(checkLoopConversionSupportStatus(loopOp)));
4451

45-
rewriteToDistributeParallelDo(loopOp, rewriter);
52+
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
53+
54+
switch (combinedInfo) {
55+
case GenericLoopCombinedInfo::Standalone:
56+
rewriteToSimdLoop(loopOp, rewriter);
57+
break;
58+
case GenericLoopCombinedInfo::TargetParallelLoop:
59+
llvm_unreachable("not yet implemented: `parallel loop` direcitve");
60+
break;
61+
case GenericLoopCombinedInfo::TargetTeamsLoop:
62+
rewriteToDistributeParallelDo(loopOp, rewriter);
63+
break;
64+
}
65+
4666
rewriter.eraseOp(loopOp);
4767
return mlir::success();
4868
}
@@ -52,9 +72,8 @@ class GenericLoopConversionPattern
5272
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp);
5373

5474
switch (combinedInfo) {
55-
case GenericLoopCombinedInfo::None:
56-
return loopOp.emitError(
57-
"not yet implemented: Standalone `omp loop` directive");
75+
case GenericLoopCombinedInfo::Standalone:
76+
break;
5877
case GenericLoopCombinedInfo::TargetParallelLoop:
5978
return loopOp.emitError(
6079
"not yet implemented: Combined `omp target parallel loop` directive");
@@ -86,7 +105,7 @@ class GenericLoopConversionPattern
86105
static GenericLoopCombinedInfo
87106
findGenericLoopCombineInfo(mlir::omp::LoopOp loopOp) {
88107
mlir::Operation *parentOp = loopOp->getParentOp();
89-
GenericLoopCombinedInfo result = GenericLoopCombinedInfo::None;
108+
GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone;
90109

91110
if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
92111
if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp()))
@@ -100,6 +119,62 @@ class GenericLoopConversionPattern
100119
return result;
101120
}
102121

122+
/// Rewrites standalone `loop` directives to equivalent `simd` constructs.
123+
/// The reasoning behind this decision is that according to the spec (version
124+
/// 5.2, section 11.7.1):
125+
///
126+
/// "If the bind clause is not specified on a construct for which it may be
127+
/// specified and the construct is closely nested inside a teams or parallel
128+
/// construct, the effect is as if binding is teams or parallel. If none of
129+
/// those conditions hold, the binding region is not defined."
130+
///
131+
/// which means that standalone `loop` directives have undefined binding
132+
/// region. Moreover, the spec says (in the next paragraph):
133+
///
134+
/// "The specified binding region determines the binding thread set.
135+
/// Specifically, if the binding region is a teams region, then the binding
136+
/// thread set is the set of initial threads that are executing that region
137+
/// while if the binding region is a parallel region, then the binding thread
138+
/// set is the team of threads that are executing that region. If the binding
139+
/// region is not defined, then the binding thread set is the encountering
140+
/// thread."
141+
///
142+
/// which means that the binding thread set for a standalone `loop` directive
143+
/// is only the encountering thread.
144+
///
145+
/// Since the encountering thread is the binding thread (set) for a
146+
/// standalone `loop` directive, the best we can do in such case is to "simd"
147+
/// the directive.
148+
void rewriteToSimdLoop(mlir::omp::LoopOp loopOp,
149+
mlir::ConversionPatternRewriter &rewriter) const {
150+
loopOp.emitWarning("Detected standalone OpenMP `loop` directive, the "
151+
"associated loop will be rewritten to `simd`.");
152+
mlir::omp::SimdOperands simdClauseOps;
153+
simdClauseOps.privateVars = loopOp.getPrivateVars();
154+
155+
auto privateSyms = loopOp.getPrivateSyms();
156+
if (privateSyms)
157+
simdClauseOps.privateSyms.assign(privateSyms->begin(),
158+
privateSyms->end());
159+
160+
Fortran::common::openmp::EntryBlockArgs simdArgs;
161+
simdArgs.priv.vars = simdClauseOps.privateVars;
162+
163+
auto simdOp =
164+
rewriter.create<mlir::omp::SimdOp>(loopOp.getLoc(), simdClauseOps);
165+
mlir::Block *simdBlock =
166+
genEntryBlock(rewriter, simdArgs, simdOp.getRegion());
167+
168+
mlir::IRMapping mapper;
169+
mlir::Block &loopBlock = *loopOp.getRegion().begin();
170+
171+
for (auto [loopOpArg, simdopArg] :
172+
llvm::zip_equal(loopBlock.getArguments(), simdBlock->getArguments()))
173+
mapper.map(loopOpArg, simdopArg);
174+
175+
rewriter.clone(*loopOp.begin(), mapper);
176+
}
177+
103178
void rewriteToDistributeParallelDo(
104179
mlir::omp::LoopOp loopOp,
105180
mlir::ConversionPatternRewriter &rewriter) const {

flang/test/Lower/OpenMP/loop-directive.f90

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
subroutine test_no_clauses()
1212
integer :: i, j, dummy = 1
1313

14-
! CHECK: omp.loop private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
14+
! CHECK: omp.simd private(@[[I_PRIV]] %{{.*}}#0 -> %[[ARG:.*]] : !fir.ref<i32>) {
1515
! CHECK-NEXT: omp.loop_nest (%[[IV:.*]]) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
1616
! CHECK: %[[ARG_DECL:.*]]:2 = hlfir.declare %[[ARG]]
1717
! CHECK: fir.store %[[IV]] to %[[ARG_DECL]]#1 : !fir.ref<i32>
@@ -27,7 +27,7 @@ subroutine test_no_clauses()
2727
! CHECK-LABEL: func.func @_QPtest_collapse
2828
subroutine test_collapse()
2929
integer :: i, j, dummy = 1
30-
! CHECK: omp.loop private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
30+
! CHECK: omp.simd private(@{{.*}} %{{.*}}#0 -> %{{.*}}, @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
3131
! CHECK-NEXT: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 {{.*}} {
3232
! CHECK: }
3333
! CHECK: }
@@ -43,7 +43,7 @@ subroutine test_collapse()
4343
! CHECK-LABEL: func.func @_QPtest_private
4444
subroutine test_private()
4545
integer :: i, dummy = 1
46-
! CHECK: omp.loop private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
46+
! CHECK: omp.simd private(@[[DUMMY_PRIV]] %{{.*}}#0 -> %[[DUMMY_ARG:.*]], @{{.*}} %{{.*}}#0 -> %{{.*}} : {{.*}}) {
4747
! CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%{{.*}}) to (%{{.*}}) {{.*}} {
4848
! CHECK: %[[DUMMY_DECL:.*]]:2 = hlfir.declare %[[DUMMY_ARG]] {uniq_name = "_QFtest_privateEdummy"}
4949
! CHECK: %{{.*}} = fir.load %[[DUMMY_DECL]]#0
@@ -100,3 +100,42 @@ subroutine test_bind()
100100
end do
101101
!$omp end loop
102102
end subroutine
103+
104+
! CHECK-LABEL: func.func @_QPtest_nested_directives
105+
subroutine test_nested_directives
106+
implicit none
107+
integer, parameter :: N = 100000
108+
integer a(N), b(N), c(N)
109+
integer j,i, num, flag;
110+
num = N
111+
112+
! CHECK: omp.teams {
113+
114+
! Verify the first `loop` directive was combined with `target teams` into
115+
! `target teams distribute parallel do`.
116+
! CHECK: omp.parallel {{.*}} {
117+
! CHECK: omp.distribute {
118+
! CHECK: omp.wsloop {
119+
! CHECK: omp.loop_nest {{.*}} {
120+
121+
! Very the second `loop` directive was rewritten to `simd`.
122+
! CHECK: omp.simd {{.*}} {
123+
! CHECK: omp.loop_nest {{.*}} {
124+
! CHECK: }
125+
! CHECK: }
126+
127+
! CHECK: }
128+
! CHECK: } {omp.composite}
129+
! CHECK: } {omp.composite}
130+
! CHECK: } {omp.composite}
131+
! CHECK: }
132+
!$omp target teams map(to: a,b) map(from: c)
133+
!$omp loop
134+
do j=1,1000
135+
!$omp loop
136+
do i=1,N
137+
c(i) = a(i) * b(i)
138+
end do
139+
end do
140+
!$omp end target teams
141+
end subroutine

flang/test/Transforms/generic-loop-rewriting-todo.mlir

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,5 @@
11
// RUN: fir-opt --omp-generic-loop-conversion -verify-diagnostics %s
22

3-
func.func @_QPtarget_loop() {
4-
%c0 = arith.constant 0 : i32
5-
%c10 = arith.constant 10 : i32
6-
%c1 = arith.constant 1 : i32
7-
// expected-error@below {{not yet implemented: Standalone `omp loop` directive}}
8-
omp.loop {
9-
omp.loop_nest (%arg3) : i32 = (%c0) to (%c10) inclusive step (%c1) {
10-
omp.yield
11-
}
12-
}
13-
return
14-
}
15-
163
func.func @_QPtarget_parallel_loop() {
174
omp.target {
185
omp.parallel {

0 commit comments

Comments
 (0)