@@ -30,19 +30,39 @@ class GenericLoopConversionPattern
30
30
: public mlir::OpConversionPattern<mlir::omp::LoopOp> {
31
31
public:
32
32
enum class GenericLoopCombinedInfo {
33
- None ,
33
+ Standalone ,
34
34
TargetTeamsLoop,
35
35
TargetParallelLoop
36
36
};
37
37
38
38
using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
39
39
40
+ explicit GenericLoopConversionPattern (mlir::MLIRContext *ctx)
41
+ : mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} {
42
+ // Enable rewrite recursion to make sure nested `loop` directives are
43
+ // handled.
44
+ this ->setHasBoundedRewriteRecursion (true );
45
+ }
46
+
40
47
mlir::LogicalResult
41
48
matchAndRewrite (mlir::omp::LoopOp loopOp, OpAdaptor adaptor,
42
49
mlir::ConversionPatternRewriter &rewriter) const override {
43
50
assert (mlir::succeeded (checkLoopConversionSupportStatus (loopOp)));
44
51
45
- rewriteToDistributeParallelDo (loopOp, rewriter);
52
+ GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo (loopOp);
53
+
54
+ switch (combinedInfo) {
55
+ case GenericLoopCombinedInfo::Standalone:
56
+ rewriteToSimdLoop (loopOp, rewriter);
57
+ break ;
58
+ case GenericLoopCombinedInfo::TargetParallelLoop:
59
+ llvm_unreachable (" not yet implemented: `parallel loop` direcitve" );
60
+ break ;
61
+ case GenericLoopCombinedInfo::TargetTeamsLoop:
62
+ rewriteToDistributeParallelDo (loopOp, rewriter);
63
+ break ;
64
+ }
65
+
46
66
rewriter.eraseOp (loopOp);
47
67
return mlir::success ();
48
68
}
@@ -52,9 +72,8 @@ class GenericLoopConversionPattern
52
72
GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo (loopOp);
53
73
54
74
switch (combinedInfo) {
55
- case GenericLoopCombinedInfo::None:
56
- return loopOp.emitError (
57
- " not yet implemented: Standalone `omp loop` directive" );
75
+ case GenericLoopCombinedInfo::Standalone:
76
+ break ;
58
77
case GenericLoopCombinedInfo::TargetParallelLoop:
59
78
return loopOp.emitError (
60
79
" not yet implemented: Combined `omp target parallel loop` directive" );
@@ -86,7 +105,7 @@ class GenericLoopConversionPattern
86
105
static GenericLoopCombinedInfo
87
106
findGenericLoopCombineInfo (mlir::omp::LoopOp loopOp) {
88
107
mlir::Operation *parentOp = loopOp->getParentOp ();
89
- GenericLoopCombinedInfo result = GenericLoopCombinedInfo::None ;
108
+ GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone ;
90
109
91
110
if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
92
111
if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp ()))
@@ -100,6 +119,62 @@ class GenericLoopConversionPattern
100
119
return result;
101
120
}
102
121
122
+ // / Rewrites standalone `loop` directives to equivalent `simd` constructs.
123
+ // / The reasoning behind this decision is that according to the spec (version
124
+ // / 5.2, section 11.7.1):
125
+ // /
126
+ // / "If the bind clause is not specified on a construct for which it may be
127
+ // / specified and the construct is closely nested inside a teams or parallel
128
+ // / construct, the effect is as if binding is teams or parallel. If none of
129
+ // / those conditions hold, the binding region is not defined."
130
+ // /
131
+ // / which means that standalone `loop` directives have undefined binding
132
+ // / region. Moreover, the spec says (in the next paragraph):
133
+ // /
134
+ // / "The specified binding region determines the binding thread set.
135
+ // / Specifically, if the binding region is a teams region, then the binding
136
+ // / thread set is the set of initial threads that are executing that region
137
+ // / while if the binding region is a parallel region, then the binding thread
138
+ // / set is the team of threads that are executing that region. If the binding
139
+ // / region is not defined, then the binding thread set is the encountering
140
+ // / thread."
141
+ // /
142
+ // / which means that the binding thread set for a standalone `loop` directive
143
+ // / is only the encountering thread.
144
+ // /
145
+ // / Since the encountering thread is the binding thread (set) for a
146
+ // / standalone `loop` directive, the best we can do in such case is to "simd"
147
+ // / the directive.
148
+ void rewriteToSimdLoop (mlir::omp::LoopOp loopOp,
149
+ mlir::ConversionPatternRewriter &rewriter) const {
150
+ loopOp.emitWarning (" Detected standalone OpenMP `loop` directive, the "
151
+ " associated loop will be rewritten to `simd`." );
152
+ mlir::omp::SimdOperands simdClauseOps;
153
+ simdClauseOps.privateVars = loopOp.getPrivateVars ();
154
+
155
+ auto privateSyms = loopOp.getPrivateSyms ();
156
+ if (privateSyms)
157
+ simdClauseOps.privateSyms .assign (privateSyms->begin (),
158
+ privateSyms->end ());
159
+
160
+ Fortran::common::openmp::EntryBlockArgs simdArgs;
161
+ simdArgs.priv .vars = simdClauseOps.privateVars ;
162
+
163
+ auto simdOp =
164
+ rewriter.create <mlir::omp::SimdOp>(loopOp.getLoc (), simdClauseOps);
165
+ mlir::Block *simdBlock =
166
+ genEntryBlock (rewriter, simdArgs, simdOp.getRegion ());
167
+
168
+ mlir::IRMapping mapper;
169
+ mlir::Block &loopBlock = *loopOp.getRegion ().begin ();
170
+
171
+ for (auto [loopOpArg, simdopArg] :
172
+ llvm::zip_equal (loopBlock.getArguments (), simdBlock->getArguments ()))
173
+ mapper.map (loopOpArg, simdopArg);
174
+
175
+ rewriter.clone (*loopOp.begin (), mapper);
176
+ }
177
+
103
178
void rewriteToDistributeParallelDo (
104
179
mlir::omp::LoopOp loopOp,
105
180
mlir::ConversionPatternRewriter &rewriter) const {
0 commit comments