Skip to content

[MLIR][OpenMP] Update omp.wsloop translation to LLVM IR (4/5) #89214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
c3962aa
[MLIR][OpenMP] Make omp.wsloop into a loop wrapper (1/5)
skatrak Apr 17, 2024
f9b14e3
[MLIR][OpenMP] Update op verifiers dependent on omp.wsloop (2/5)
skatrak Apr 17, 2024
fdee8cf
[MLIR][SCF] Update scf.parallel lowering to OpenMP (3/5)
skatrak Apr 18, 2024
25dc3a4
[MLIR][OpenMP] Update omp.wsloop translation to LLVM IR (4/5)
skatrak Apr 18, 2024
18c8bda
Address review comment, improve tests
skatrak Apr 19, 2024
f7254bf
Merge branch 'users/skatrak/spr/wsloop-wrapper-02-dependent-ops' into…
skatrak Apr 19, 2024
f8c0897
Address review comments
skatrak Apr 19, 2024
2d53695
Merge branch 'users/skatrak/spr/wsloop-wrapper-03-scf-parallel' into …
skatrak Apr 19, 2024
6401482
Address review comments
skatrak Apr 19, 2024
2682417
Merge branch 'users/skatrak/spr/wsloop-wrapper-01-mlir' into users/sk…
skatrak Apr 19, 2024
e2a8386
Merge branch 'users/skatrak/spr/wsloop-wrapper-02-dependent-ops' into…
skatrak Apr 19, 2024
996a7cd
Merge branch 'users/skatrak/spr/wsloop-wrapper-03-scf-parallel' into …
skatrak Apr 19, 2024
f833127
Merge branch 'main' into users/skatrak/spr/wsloop-wrapper-01-mlir
skatrak Apr 24, 2024
48e93f9
Improve documentation
skatrak Apr 24, 2024
fc21155
Merge branch 'users/skatrak/spr/wsloop-wrapper-01-mlir' into users/sk…
skatrak Apr 24, 2024
e9f960a
Merge branch 'users/skatrak/spr/wsloop-wrapper-02-dependent-ops' into…
skatrak Apr 24, 2024
077c430
Merge branch 'users/skatrak/spr/wsloop-wrapper-03-scf-parallel' into …
skatrak Apr 24, 2024
196837c
Fix unit tests
skatrak Apr 24, 2024
75df239
Merge branch 'users/skatrak/spr/wsloop-wrapper-01-mlir' into users/sk…
skatrak Apr 24, 2024
b720026
Merge branch 'users/skatrak/spr/wsloop-wrapper-02-dependent-ops' into…
skatrak Apr 24, 2024
00bb71a
Merge branch 'users/skatrak/spr/wsloop-wrapper-03-scf-parallel' into …
skatrak Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,10 +295,9 @@ using TeamsClauseOps =
PrivateClauseOps, ReductionClauseOps, ThreadLimitClauseOps>;

using WsloopClauseOps =
detail::Clauses<AllocateClauseOps, CollapseClauseOps, LinearClauseOps,
LoopRelatedOps, NowaitClauseOps, OrderClauseOps,
OrderedClauseOps, PrivateClauseOps, ReductionClauseOps,
ScheduleClauseOps>;
detail::Clauses<AllocateClauseOps, LinearClauseOps, NowaitClauseOps,
OrderClauseOps, OrderedClauseOps, PrivateClauseOps,
ReductionClauseOps, ScheduleClauseOps>;

} // namespace omp
} // namespace mlir
Expand Down
62 changes: 28 additions & 34 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -600,29 +600,30 @@ def LoopNestOp : OpenMP_Op<"loop_nest", [SameVariadicOperandSize,
//===----------------------------------------------------------------------===//

def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
AllTypesMatch<["lowerBound", "upperBound", "step"]>,
DeclareOpInterfaceMethods<LoopWrapperInterface>,
RecursiveMemoryEffects, ReductionClauseInterface]> {
RecursiveMemoryEffects, ReductionClauseInterface,
SingleBlockImplicitTerminator<"TerminatorOp">]> {
let summary = "worksharing-loop construct";
let description = [{
The worksharing-loop construct specifies that the iterations of the loop(s)
will be executed in parallel by threads in the current context. These
iterations are spread across threads that already exist in the enclosing
parallel region. The lower and upper bounds specify a half-open range: the
range includes the lower bound but does not include the upper bound. If the
`inclusive` attribute is specified then the upper bound is also included.
parallel region.

The body region can contain any number of blocks. The region is terminated
by "omp.yield" instruction without operands.
The body region can only contain a single block which must contain a single
operation and a terminator. The operation must be another compatible loop
wrapper or an `omp.loop_nest`.

```
omp.wsloop <clauses>
for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
omp.wsloop <clauses> {
omp.loop_nest (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
%a = load %arrA[%i1, %i2] : memref<?x?xf32>
%b = load %arrB[%i1, %i2] : memref<?x?xf32>
%sum = arith.addf %a, %b : f32
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
}
omp.terminator
}
```

Expand Down Expand Up @@ -665,10 +666,7 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
passed by reference.
}];

let arguments = (ins Variadic<IntLikeType>:$lowerBound,
Variadic<IntLikeType>:$upperBound,
Variadic<IntLikeType>:$step,
Variadic<AnyType>:$linear_vars,
let arguments = (ins Variadic<AnyType>:$linear_vars,
Variadic<I32>:$linear_step_vars,
Variadic<OpenMP_PointerLikeType>:$reduction_vars,
OptionalAttr<SymbolRefArrayAttr>:$reductions,
Expand All @@ -679,22 +677,16 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
UnitAttr:$nowait,
UnitAttr:$byref,
ConfinedAttr<OptionalAttr<I64Attr>, [IntMinValue<0>]>:$ordered_val,
OptionalAttr<OrderKindAttr>:$order_val,
UnitAttr:$inclusive);
OptionalAttr<OrderKindAttr>:$order_val);

let builders = [
OpBuilder<(ins "ValueRange":$lowerBound, "ValueRange":$upperBound,
"ValueRange":$step,
CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
OpBuilder<(ins CArg<"ArrayRef<NamedAttribute>", "{}">:$attributes)>,
OpBuilder<(ins CArg<"const WsloopClauseOps &">:$clauses)>
];

let regions = (region AnyRegion:$region);

let extraClassDeclaration = [{
/// Returns the number of loops in the worksharing-loop nest.
unsigned getNumLoops() { return getLowerBound().size(); }

/// Returns the number of reduction variables.
unsigned getNumReductionVars() { return getReductionVars().size(); }
}];
Expand All @@ -711,9 +703,8 @@ def WsloopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|`byref` $byref
|`ordered` `(` $ordered_val `)`
|`order` `(` custom<ClauseAttr>($order_val) `)`
) custom<Wsloop>($region, $lowerBound, $upperBound, $step, type($step),
$reduction_vars, type($reduction_vars), $reductions,
$inclusive) attr-dict
) custom<Wsloop>($region, $reduction_vars, type($reduction_vars),
$reductions) attr-dict
}];
let hasVerifier = 1;
}
Expand All @@ -732,7 +723,7 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,
transformed into a SIMD loop (that is, multiple iterations of the loop can
be executed concurrently using SIMD instructions).

The body region can contain a single block which must contain a single
The body region can only contain a single block which must contain a single
operation and a terminator. The operation must be another compatible loop
wrapper or an `omp.loop_nest`.

Expand Down Expand Up @@ -766,6 +757,7 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
}
omp.terminator
}
```
}];
Expand Down Expand Up @@ -805,8 +797,8 @@ def SimdOp : OpenMP_Op<"simd", [AttrSizedOperandSegments,

def YieldOp : OpenMP_Op<"yield",
[Pure, ReturnLike, Terminator,
ParentOneOf<["LoopNestOp", "WsloopOp", "DeclareReductionOp",
"AtomicUpdateOp", "PrivateClauseOp"]>]> {
ParentOneOf<["AtomicUpdateOp", "DeclareReductionOp", "LoopNestOp",
"PrivateClauseOp"]>]> {
let summary = "loop yield and termination operation";
let description = [{
"omp.yield" yields SSA values from the OpenMP dialect op region and
Expand Down Expand Up @@ -846,7 +838,7 @@ def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments,
iterations are spread across threads that already exist in the enclosing
region.

The body region can contain a single block which must contain a single
The body region can only contain a single block which must contain a single
operation and a terminator. The operation must be another compatible loop
wrapper or an `omp.loop_nest`.

Expand All @@ -864,6 +856,7 @@ def DistributeOp : OpenMP_Op<"distribute", [AttrSizedOperandSegments,
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
}
omp.terminator
}
```
// TODO: private_var, firstprivate_var, lastprivate_var, collapse
Expand Down Expand Up @@ -1029,7 +1022,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
iterations are distributed across tasks generated by the construct and
scheduled to be executed.

The body region can contain a single block which must contain a single
The body region can only contain a single block which must contain a single
operation and a terminator. The operation must be another compatible loop
wrapper or an `omp.loop_nest`.

Expand All @@ -1042,6 +1035,7 @@ def TaskloopOp : OpenMP_Op<"taskloop", [AttrSizedOperandSegments,
store %sum, %arrC[%i1, %i2] : memref<?x?xf32>
omp.yield
}
omp.terminator
}
```

Expand Down
51 changes: 39 additions & 12 deletions mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,18 +461,50 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
// Replace the loop.
{
OpBuilder::InsertionGuard allocaGuard(rewriter);
auto loop = rewriter.create<omp::WsloopOp>(
// Create worksharing loop wrapper.
auto wsloopOp = rewriter.create<omp::WsloopOp>(parallelOp.getLoc());
if (!reductionVariables.empty()) {
wsloopOp.setReductionsAttr(
ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
wsloopOp.getReductionVarsMutable().append(reductionVariables);
}
rewriter.create<omp::TerminatorOp>(loc); // omp.parallel terminator.

// The wrapper's entry block arguments will define the reduction
// variables.
llvm::SmallVector<mlir::Type> reductionTypes;
reductionTypes.reserve(reductionVariables.size());
llvm::transform(reductionVariables, std::back_inserter(reductionTypes),
[](mlir::Value v) { return v.getType(); });
rewriter.createBlock(
&wsloopOp.getRegion(), {}, reductionTypes,
llvm::SmallVector<mlir::Location>(reductionVariables.size(),
parallelOp.getLoc()));

rewriter.setInsertionPoint(
rewriter.create<omp::TerminatorOp>(parallelOp.getLoc()));

// Create loop nest and populate region with contents of scf.parallel.
auto loopOp = rewriter.create<omp::LoopNestOp>(
parallelOp.getLoc(), parallelOp.getLowerBound(),
parallelOp.getUpperBound(), parallelOp.getStep());
rewriter.create<omp::TerminatorOp>(loc);

rewriter.inlineRegionBefore(parallelOp.getRegion(), loop.getRegion(),
loop.getRegion().begin());
rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(),
loopOp.getRegion().begin());

Block *ops = rewriter.splitBlock(&*loop.getRegion().begin(),
loop.getRegion().begin()->begin());
// Remove reduction-related block arguments from omp.loop_nest and
// redirect uses to the corresponding omp.wsloop block argument.
mlir::Block &loopOpEntryBlock = loopOp.getRegion().front();
unsigned numLoops = parallelOp.getNumLoops();
rewriter.replaceAllUsesWith(
loopOpEntryBlock.getArguments().drop_front(numLoops),
wsloopOp.getRegion().getArguments());
loopOpEntryBlock.eraseArguments(
numLoops, loopOpEntryBlock.getNumArguments() - numLoops);

rewriter.setInsertionPointToStart(&*loop.getRegion().begin());
Block *ops =
rewriter.splitBlock(&loopOpEntryBlock, loopOpEntryBlock.begin());
rewriter.setInsertionPointToStart(&loopOpEntryBlock);

auto scope = rewriter.create<memref::AllocaScopeOp>(parallelOp.getLoc(),
TypeRange());
Expand All @@ -481,11 +513,6 @@ struct ParallelOpLowering : public OpRewritePattern<scf::ParallelOp> {
rewriter.mergeBlocks(ops, scopeBlock);
rewriter.setInsertionPointToEnd(&*scope.getBodyRegion().begin());
rewriter.create<memref::AllocaScopeReturnOp>(loc, ValueRange());
if (!reductionVariables.empty()) {
loop.setReductionsAttr(
ArrayAttr::get(rewriter.getContext(), reductionDeclSymbols));
loop.getReductionVarsMutable().append(reductionVariables);
}
}
}

Expand Down
Loading