-
Notifications
You must be signed in to change notification settings - Fork 13.5k
WIP : [Flang][MLIR][OpenMP] Delayed privatisation of index variables #75836
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
f6c8748
to
2d6e1c4
Compare
You can test this locally with the following command:git-clang-format --diff decf0277a7f2c69684a44e80f7791038cfaed82d 2d6e1c49fa9687c2ac649aa3416ce04e9f6fb7ba -- flang/lib/Lower/OpenMP.cpp mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp View the diff from clang-format here.diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index c5c9859dfd..57983d60e5 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -2139,11 +2139,11 @@ static void createBodyOfLoopOp(
int argIndex = 0;
for (const Fortran::semantics::Symbol *arg : args) {
mlir::Value addrVal =
- fir::getBase(op.getRegion().front().getArgument(argIndex+offset));
+ fir::getBase(op.getRegion().front().getArgument(argIndex + offset));
converter.bindSymbol(*arg, addrVal);
mlir::Type symType = converter.genType(*arg);
mlir::Value indexVal =
- fir::getBase(op.getRegion().front().getArgument(argIndex));
+ fir::getBase(op.getRegion().front().getArgument(argIndex));
mlir::Value cvtVal = firOpBuilder.createConvert(loc, symType, indexVal);
addrVal = converter.getSymbolAddress(*arg);
storeOp = firOpBuilder.create<fir::StoreOp>(loc, cvtVal, addrVal);
@@ -3121,7 +3121,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
return;
}
- // Collect the loops to collapse.
+ // Collect the loops to collapse.
Fortran::lower::pft::Evaluation *doConstructEval =
&eval.getFirstNestedEvaluation();
Fortran::lower::pft::Evaluation *doLoop =
@@ -3174,8 +3174,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
}
createBodyOfLoopOp<mlir::omp::WsLoopOp>(wsLoopOp, converter, currentLocation,
- eval, &loopOpClauseList, iv,
- /*outer=*/false, &dsp);
+ eval, &loopOpClauseList, iv,
+ /*outer=*/false, &dsp);
}
static void
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 3da37231e3..18dbbd572e 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -178,10 +178,10 @@ void printClauseAttr(OpAsmPrinter &p, Operation *op, ClauseAttr attr) {
p << stringifyEnum(attr.getValue());
}
-static ParseResult
-parsePrivateEntries(OpAsmParser &parser,
- SmallVectorImpl<OpAsmParser::UnresolvedOperand> &privateOperands,
- SmallVectorImpl<Type> &privateOperandTypes) {
+static ParseResult parsePrivateEntries(
+ OpAsmParser &parser,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &privateOperands,
+ SmallVectorImpl<Type> &privateOperandTypes) {
OpAsmParser::UnresolvedOperand arg;
OpAsmParser::UnresolvedOperand blockArg;
Type argType;
@@ -213,8 +213,8 @@ parsePrivateEntries(OpAsmParser &parser,
}
static void printPrivateEntries(OpAsmPrinter &p, Operation *op,
- OperandRange privateOperands,
- TypeRange privateOperandTypes) {
+ OperandRange privateOperands,
+ TypeRange privateOperandTypes) {
auto ®ion = op->getRegion(0);
unsigned argIndex = 0;
@@ -222,7 +222,7 @@ static void printPrivateEntries(OpAsmPrinter &p, Operation *op,
if (auto wsLoop = dyn_cast<WsLoopOp>(op))
offset = wsLoop.getNumLoops();
for (const auto &privOperand : privateOperands) {
- const auto &blockArg = region.front().getArgument(argIndex+offset);
+ const auto &blockArg = region.front().getArgument(argIndex + offset);
p << privOperand << " -> " << blockArg;
argIndex++;
if (argIndex < privateOperands.size())
@@ -1149,13 +1149,13 @@ void printLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion,
auto args = region.front().getArguments();
p << " (";
unsigned numLoops = steps.size();
- for (unsigned i=0; i<numLoops; i++) {
+ for (unsigned i = 0; i < numLoops; i++) {
if (i != 0)
p << ", ";
p << args[i];
}
- p << ") : " << args[0].getType() << " = (" << lowerBound
- << ") to (" << upperBound << ") ";
+ p << ") : " << args[0].getType() << " = (" << lowerBound << ") to ("
+ << upperBound << ") ";
if (inclusive)
p << "inclusive ";
p << "step (" << steps << ") ";
@@ -1338,7 +1338,7 @@ void WsLoopOp::build(OpBuilder &builder, OperationState &state,
build(builder, state, lowerBound, upperBound, step,
/*linear_vars=*/ValueRange(),
/*linear_step_vars=*/ValueRange(), /*private_vars=*/ValueRange(),
- /*reduction_vars=*/ValueRange(),
+ /*reduction_vars=*/ValueRange(),
/*reductions=*/nullptr, /*schedule_val=*/nullptr,
/*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr,
/*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 26ec12e427..3c3a5278df 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -834,17 +834,18 @@ static void collectReductionInfo(
}
/// Allocate space for privatized reduction variables.
-void
-allocPrivatizationVars(omp::WsLoopOp loop, llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation,
- llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
+void allocPrivatizationVars(omp::WsLoopOp loop, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation,
+ llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) {
unsigned offset = loop.getNumLoops();
unsigned numArgs = loop.getRegion().front().getNumArguments();
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.restoreIP(allocaIP);
for (unsigned i = offset; i < numArgs; ++i) {
- if (auto op = loop.getPrivates()[i-offset].getDefiningOp<LLVM::AllocaOp>()) {
- llvm::Value *var = builder.CreateAlloca(moduleTranslation.convertType(op.getResultPtrElementType()));
+ if (auto op =
+ loop.getPrivates()[i - offset].getDefiningOp<LLVM::AllocaOp>()) {
+ llvm::Value *var = builder.CreateAlloca(
+ moduleTranslation.convertType(op.getResultPtrElementType()));
// moduleTranslation.convertType(loop.getPrivates()[i-offset].getType()));
moduleTranslation.mapValue(loop.getRegion().front().getArgument(i), var);
}
|
Nice to see some work towards more information carried out in the dialect! |
This is an alternative to #74843. But looks like this patch alone is not sufficient. But we can change the codegen of worksharing loop to emit the alloca in the parallel body function. Alternatively, since the alloca for the index variable is being materialized late, we can add the lifetime start and end at the point. @skatrak |
I think this approach makes sense. If I understand it correctly, this would add an explicit dialect representation for the private clause of the OpenMP for/do loop and use it internally to privatize the loop index, which allows us only materializing the copy later during MLIR to LLVM IR translation. In that case, I think the best option is to create the alloca inside of the outlined function for the parallel region and then add lifetime markers similarly to what I proposed in #74843, but more simply since we would already have access to the relevant allocas there, and there would be no need of analyzing the MLIR loop body. Ideally, if we were to outline the loop body into its own function, the best would be to create the alloca there and not have to deal with lifetime markers. But the process we have for this relies on using the code extractor on a loop that has already been created inside of the outlined function for the parallel section, so we either mark the allocation's lifetime so it can be sunk into the new outlined function or we generate it inside of the loop. I think the second case would result in a very slow execution or possibly even memory leaks when this second outlining is not done, so the first one seems like the safer option. Let me know if you'd like to explore that approach and I can update my patch / create another one to build on top of this. I guess the idea would be to add the private, firstprivate and lastprivate args to all applicable OpenMP MLIR ops, and potentially modify the |
Not for merge.
This patch is a WIP to delay privatisation of index variables.