Skip to content

WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables #75836

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 115 additions & 3 deletions flang/lib/Lower/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2075,6 +2075,7 @@ createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());

mlir::Type tempTy = converter.genType(*sym);
llvm::outs() << "Temp type = " << tempTy << "\n";
mlir::Value temp = firOpBuilder.create<fir::AllocaOp>(
loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{},
/*shapeParams*/ mlir::ValueRange{},
Expand All @@ -2088,6 +2089,103 @@ createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter,
return storeOp;
}

/// Create the body (block) for an OpenMP Loop Operation.
///
/// \param [in] op - the operation the body belongs to.
/// \param [inout] converter - converter to use for the clauses.
/// \param [in] loc - location in source code.
/// \param [in] eval - current PFT node/evaluation.
/// \oaran [in] clauses - list of clauses to process.
/// \param [in] args - block arguments (induction variable[s]) for the
//// region.
/// \param [in] outerCombined - is this an outer operation - prevents
/// privatization.
template <typename Op>
static void createBodyOfLoopOp(
Op &op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc,
Fortran::lower::pft::Evaluation &eval,
const Fortran::parser::OmpClauseList *clauses = nullptr,
const llvm::SmallVector<const Fortran::semantics::Symbol *> &args = {},
bool outerCombined = false, DataSharingProcessor *dsp = nullptr) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
// If an argument for the region is provided then create the block with that
// argument. Also update the symbol's address with the mlir argument value.
// e.g. For loops the argument is the induction variable. And all further
// uses of the induction variable should use this mlir value.
mlir::Operation *storeOp = nullptr;
assert(args.size() > 0);
std::size_t loopVarTypeSize = 0;
for (const Fortran::semantics::Symbol *arg : args)
loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size());
mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
llvm::SmallVector<mlir::Type> tiv;
llvm::SmallVector<mlir::Location> locs;
for (int i = 0; i < (int)args.size(); i++) {
tiv.push_back(loopVarType);
locs.push_back(loc);
}
int offset = 0;
// The argument is not currently in memory, so make a temporary for the
// argument, and store it there, then bind that location to the argument.
for (const Fortran::semantics::Symbol *arg : args) {
mlir::Type symType = converter.genType(*arg);
mlir::Type symRefType = firOpBuilder.getRefType(symType);
tiv.push_back(symRefType);
locs.push_back(loc);
offset++;
}
firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs);

int argIndex = 0;
for (const Fortran::semantics::Symbol *arg : args) {
mlir::Value addrVal =
fir::getBase(op.getRegion().front().getArgument(argIndex+offset));
converter.bindSymbol(*arg, addrVal);
mlir::Type symType = converter.genType(*arg);
mlir::Value indexVal =
fir::getBase(op.getRegion().front().getArgument(argIndex));
mlir::Value cvtVal = firOpBuilder.createConvert(loc, symType, indexVal);
addrVal = converter.getSymbolAddress(*arg);
storeOp = firOpBuilder.create<fir::StoreOp>(loc, cvtVal, addrVal);
argIndex++;
}
// Set the insert for the terminator operation to go at the end of the
// block - this is either empty or the block with the stores above,
// the end of the block works for both.
mlir::Block &block = op.getRegion().back();
firOpBuilder.setInsertionPointToEnd(&block);

// If it is an unstructured region and is not the outer region of a combined
// construct, create empty blocks for all evaluations.
if (eval.lowerAsUnstructured() && !outerCombined)
Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp,
mlir::omp::YieldOp>(
firOpBuilder, eval.getNestedEvaluations());

// Insert the terminator.
Fortran::lower::genOpenMPTerminator(firOpBuilder, op.getOperation(), loc);
// Reset the insert point to before the terminator.
resetBeforeTerminator(firOpBuilder, storeOp, block);

// Handle privatization. Do not privatize if this is the outer operation.
if (clauses && !outerCombined) {
constexpr bool isLoop = std::is_same_v<Op, mlir::omp::WsLoopOp> ||
std::is_same_v<Op, mlir::omp::SimdLoopOp>;
if (!dsp) {
DataSharingProcessor proc(converter, *clauses, eval);
proc.processStep1();
proc.processStep2(op, isLoop);
} else {
if (isLoop && args.size() > 0)
dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
dsp->processStep2(op, isLoop);
}

if (storeOp)
firOpBuilder.setInsertionPointAfter(storeOp);
}
}

/// Create the body (block) for an OpenMP Operation.
///
/// \param [in] op - the operation the body belongs to.
Expand Down Expand Up @@ -2914,7 +3012,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
const Fortran::parser::OpenMPLoopConstruct &loopConstruct) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
llvm::SmallVector<mlir::Value> lowerBound, upperBound, step, linearVars,
linearStepVars, reductionVars;
linearStepVars, privateVars, reductionVars;
mlir::Value scheduleChunkClauseOperand;
mlir::IntegerAttr orderedClauseOperand;
mlir::omp::ClauseOrderKindAttr orderClauseOperand;
Expand Down Expand Up @@ -3023,9 +3121,23 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
return;
}

// Collect the loops to collapse.
Fortran::lower::pft::Evaluation *doConstructEval =
&eval.getFirstNestedEvaluation();
Fortran::lower::pft::Evaluation *doLoop =
&doConstructEval->getFirstNestedEvaluation();
auto *doStmt = doLoop->getIf<Fortran::parser::NonLabelDoStmt>();
assert(doStmt && "Expected do loop to be in the nested evaluation");
const auto &loopControl =
std::get<std::optional<Fortran::parser::LoopControl>>(doStmt->t);
const Fortran::parser::LoopControl::Bounds *bounds =
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
assert(bounds && "Expected bounds for worksharing do loop");
privateVars.push_back(converter.getSymbolAddress(*bounds->name.thing.symbol));

auto wsLoopOp = firOpBuilder.create<mlir::omp::WsLoopOp>(
currentLocation, lowerBound, upperBound, step, linearVars, linearStepVars,
reductionVars,
privateVars, reductionVars,
reductionDeclSymbols.empty()
? nullptr
: mlir::ArrayAttr::get(firOpBuilder.getContext(),
Expand Down Expand Up @@ -3061,7 +3173,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
wsLoopOp.setNowaitAttr(nowaitClauseOperand);
}

createBodyOfOp<mlir::omp::WsLoopOp>(wsLoopOp, converter, currentLocation,
createBodyOfLoopOp<mlir::omp::WsLoopOp>(wsLoopOp, converter, currentLocation,
eval, &loopOpClauseList, iv,
/*outer=*/false, &dsp);
}
Expand Down
2 changes: 2 additions & 0 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
Variadic<IntLikeType>:$step,
Variadic<AnyType>:$linear_vars,
Variadic<I32>:$linear_step_vars,
Variadic<OpenMP_PointerLikeType>:$privates,
Variadic<OpenMP_PointerLikeType>:$reduction_vars,
OptionalAttr<SymbolRefArrayAttr>:$reductions,
OptionalAttr<ScheduleKindAttr>:$schedule_val,
Expand Down Expand Up @@ -517,6 +518,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments,
|`nowait` $nowait
|`ordered` `(` $ordered_val `)`
|`order` `(` custom<ClauseAttr>($order_val) `)`
|`private` `(` custom<PrivateEntries>($privates, type($privates)) `)`
|`reduction` `(`
custom<ReductionVarList>(
$reduction_vars, type($reduction_vars), $reductions
Expand Down
73 changes: 71 additions & 2 deletions mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,67 @@ void printClauseAttr(OpAsmPrinter &p, Operation *op, ClauseAttr attr) {
p << stringifyEnum(attr.getValue());
}

static ParseResult
parsePrivateEntries(OpAsmParser &parser,
SmallVectorImpl<OpAsmParser::UnresolvedOperand> &privateOperands,
SmallVectorImpl<Type> &privateOperandTypes) {
OpAsmParser::UnresolvedOperand arg;
OpAsmParser::UnresolvedOperand blockArg;
Type argType;
auto parseEntries = [&]() -> ParseResult {
if (parser.parseOperand(arg) || parser.parseArrow() ||
parser.parseOperand(blockArg))
return failure();
privateOperands.push_back(arg);
return success();
};

auto parseTypes = [&]() -> ParseResult {
if (parser.parseType(argType))
return failure();
privateOperandTypes.push_back(argType);
return success();
};

if (parser.parseCommaSeparatedList(parseEntries))
return failure();

if (parser.parseColon())
return failure();

if (parser.parseCommaSeparatedList(parseTypes))
return failure();

return success();
}

static void printPrivateEntries(OpAsmPrinter &p, Operation *op,
OperandRange privateOperands,
TypeRange privateOperandTypes) {
auto &region = op->getRegion(0);

unsigned argIndex = 0;
unsigned offset = 0;
if (auto wsLoop = dyn_cast<WsLoopOp>(op))
offset = wsLoop.getNumLoops();
for (const auto &privOperand : privateOperands) {
const auto &blockArg = region.front().getArgument(argIndex+offset);
p << privOperand << " -> " << blockArg;
argIndex++;
if (argIndex < privateOperands.size())
p << ", ";
}
p << " : ";

argIndex = 0;
for (const auto &privOperandType : privateOperandTypes) {
p << privOperandType;
argIndex++;
if (argIndex < privateOperands.size())
p << ", ";
}
}

//===----------------------------------------------------------------------===//
// Parser and printer for Linear Clause
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1086,7 +1147,14 @@ void printLoopControl(OpAsmPrinter &p, Operation *op, Region &region,
ValueRange steps, TypeRange loopVarTypes,
UnitAttr inclusive) {
auto args = region.front().getArguments();
p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound
p << " (";
unsigned numLoops = steps.size();
for (unsigned i=0; i<numLoops; i++) {
if (i != 0)
p << ", ";
p << args[i];
}
p << ") : " << args[0].getType() << " = (" << lowerBound
<< ") to (" << upperBound << ") ";
if (inclusive)
p << "inclusive ";
Expand Down Expand Up @@ -1269,7 +1337,8 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
ValueRange step, ArrayRef<NamedAttribute> attributes) {
build(builder, state, lowerBound, upperBound, step,
/*linear_vars=*/ValueRange(),
/*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(),
/*linear_step_vars=*/ValueRange(), /*private_vars=*/ValueRange(),
/*reduction_vars=*/ValueRange(),
/*reductions=*/nullptr, /*schedule_val=*/nullptr,
/*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
/*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,24 @@ static void collectReductionInfo(
}
}

/// Allocate space for privatized reduction variables.
void
allocPrivatizationVars(omp::WsLoopOp loop, llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
unsigned offset = loop.getNumLoops();
unsigned numArgs = loop.getRegion().front().getNumArguments();
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.restoreIP(allocaIP);
for (unsigned i = offset; i < numArgs; ++i) {
if (auto op = loop.getPrivates()[i-offset].getDefiningOp<LLVM::AllocaOp>()) {
llvm::Value *var = builder.CreateAlloca(moduleTranslation.convertType(op.getResultPtrElementType()));
// moduleTranslation.convertType(loop.getPrivates()[i-offset].getType()));
moduleTranslation.mapValue(loop.getRegion().front().getArgument(i), var);
}
}
}

/// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder.
static LogicalResult
convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
Expand Down Expand Up @@ -861,6 +879,8 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);

allocPrivatizationVars(loop, builder, moduleTranslation, allocaIP);

SmallVector<llvm::Value *> privateReductionVariables;
DenseMap<Value, llvm::Value *> reductionVariableMap;
allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls,
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4:
// CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> ()
"test.payload"(%arg6, %arg7) : (index, index) -> ()
omp.yield
}) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 0>} : (index, index, index, index, index, index) -> ()
}) {operandSegmentSizes = array<i32: 2, 2, 2, 0, 0, 0, 0, 0>} : (index, index, index, index, index, index) -> ()
omp.terminator
}
return
Expand Down
10 changes: 5 additions & 5 deletions mlir/test/Dialect/OpenMP/ops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -141,39 +141,39 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre
"omp.wsloop" (%lb, %ub, %step) ({
^bb0(%iv: index):
omp.yield
}) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0>, ordered_val = 1} :
}) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0,0>, ordered_val = 1} :
(index, index, index) -> ()

// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(static)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
"omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({
^bb0(%iv: index):
omp.yield
}) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,0>, schedule_val = #omp<schedulekind static>} :
}) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,0,0>, schedule_val = #omp<schedulekind static>} :
(index, index, index, memref<i32>, i32) -> ()

// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>, %{{.*}} = %{{.*}} : memref<i32>) schedule(static)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
"omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({
^bb0(%iv: index):
omp.yield
}) {operandSegmentSizes = array<i32: 1,1,1,2,2,0,0>, schedule_val = #omp<schedulekind static>} :
}) {operandSegmentSizes = array<i32: 1,1,1,2,2,0,0,0>, schedule_val = #omp<schedulekind static>} :
(index, index, index, memref<i32>, memref<i32>, i32, i32) -> ()

// CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref<i32>) schedule(dynamic = %{{.*}}) ordered(2)
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
"omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({
^bb0(%iv: index):
omp.yield
}) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,1>, schedule_val = #omp<schedulekind dynamic>, ordered_val = 2} :
}) {operandSegmentSizes = array<i32: 1,1,1,1,1,0,0,1>, schedule_val = #omp<schedulekind dynamic>, ordered_val = 2} :
(index, index, index, memref<i32>, i32, i32) -> ()

// CHECK: omp.wsloop schedule(auto) nowait
// CHECK-SAME: for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}})
"omp.wsloop" (%lb, %ub, %step) ({
^bb0(%iv: index):
omp.yield
}) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0>, nowait, schedule_val = #omp<schedulekind auto>} :
}) {operandSegmentSizes = array<i32: 1,1,1,0,0,0,0,0>, nowait, schedule_val = #omp<schedulekind auto>} :
(index, index, index) -> ()

return
Expand Down
6 changes: 3 additions & 3 deletions mlir/test/Target/LLVMIR/openmp-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) {
llvm.store %3, %4 : f32, !llvm.ptr
omp.yield
// CHECK: call void @__kmpc_for_static_fini(ptr @[[$loc_struct]],
}) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
}) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
omp.terminator
}
llvm.return
Expand All @@ -330,7 +330,7 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) {
%4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
llvm.store %3, %4 : f32, !llvm.ptr
omp.yield
}) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
}) {operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
llvm.return
}

Expand All @@ -348,7 +348,7 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) {
%4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32
llvm.store %3, %4 : f32, !llvm.ptr
omp.yield
}) {inclusive, operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
}) {inclusive, operandSegmentSizes = array<i32: 1, 1, 1, 0, 0, 0, 0, 0>} : (i64, i64, i64) -> ()
llvm.return
}

Expand Down