Skip to content

Commit ef2d116

Browse files
tblahjoaosaffran
authored and
joaosaffran
committed
[mlir][OpenMP] initialize (first)private variables before task exec (llvm#125304)
This still doesn't fix the memory safety issues because the stack allocations created here for the private variables might go out of scope. I will add a more complete lit test later in this patch series.
1 parent 4818ffd commit ef2d116

File tree

2 files changed

+116
-54
lines changed

2 files changed

+116
-54
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,8 +1352,7 @@ findAssociatedValue(Value privateVar, llvm::IRBuilderBase &builder,
13521352
static llvm::Error initPrivateVar(
13531353
llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
13541354
omp::PrivateClauseOp &privDecl, Value mlirPrivVar, BlockArgument &blockArg,
1355-
llvm::SmallVectorImpl<llvm::Value *>::iterator llvmPrivateVarIt,
1356-
llvm::BasicBlock *privInitBlock,
1355+
llvm::Value **llvmPrivateVarIt, llvm::BasicBlock *privInitBlock,
13571356
llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
13581357
Region &initRegion = privDecl.getInitRegion();
13591358
if (initRegion.empty()) {
@@ -1783,31 +1782,82 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
17831782
for (mlir::Value privateVar : taskOp.getPrivateVars())
17841783
mlirPrivateVars.push_back(privateVar);
17851784

1786-
auto bodyCB = [&](InsertPointTy allocaIP,
1787-
InsertPointTy codegenIP) -> llvm::Error {
1788-
// Save the alloca insertion point on ModuleTranslation stack for use in
1789-
// nested regions.
1790-
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
1791-
moduleTranslation, allocaIP);
1785+
// Allocate and copy private variables before creating the task. This avoids
1786+
// accessing invalid memory if (after this scope ends) the private variables
1787+
// are initialized from host variables or if the variables are copied into
1788+
// from host variables (firstprivate). The insertion point is just before
1789+
// where the code for creating and scheduling the task will go. That puts this
1790+
// code outside of the outlined task region, which is what we want because
1791+
// this way the initialization and copy regions are executed immediately while
1792+
// the host variable data are still live.
17921793

1793-
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
1794-
builder, moduleTranslation, privateBlockArgs, privateDecls,
1795-
mlirPrivateVars, llvmPrivateVars, allocaIP);
1796-
if (handleError(afterAllocas, *taskOp).failed())
1797-
return llvm::make_error<PreviouslyReportedError>();
1794+
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1795+
findAllocaInsertPoint(builder, moduleTranslation);
17981796

1799-
builder.restoreIP(codegenIP);
1800-
if (handleError(initPrivateVars(builder, moduleTranslation,
1801-
privateBlockArgs, privateDecls,
1802-
mlirPrivateVars, llvmPrivateVars),
1803-
*taskOp)
1804-
.failed())
1805-
return llvm::make_error<PreviouslyReportedError>();
1797+
// Not using splitBB() because that requires the current block to have a
1798+
// terminator.
1799+
assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
1800+
llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create(
1801+
builder.getContext(), "omp.task.start",
1802+
/*Parent=*/builder.GetInsertBlock()->getParent());
1803+
llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock);
1804+
builder.SetInsertPoint(branchToTaskStartBlock);
18061805

1807-
if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1808-
llvmPrivateVars, privateDecls)))
1809-
return llvm::make_error<PreviouslyReportedError>();
1806+
// Now do this again to make the initialization and copy blocks
1807+
llvm::BasicBlock *copyBlock =
1808+
splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
1809+
llvm::BasicBlock *initBlock =
1810+
splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
1811+
1812+
// Now the control flow graph should look like
1813+
// starter_block:
1814+
// <---- where we started when convertOmpTaskOp was called
1815+
// br %omp.private.init
1816+
// omp.private.init:
1817+
// br %omp.private.copy
1818+
// omp.private.copy:
1819+
// br %omp.task.start
1820+
// omp.task.start:
1821+
// <---- where we want the insertion point to be when we call createTask()
1822+
1823+
// Save the alloca insertion point on ModuleTranslation stack for use in
1824+
// nested regions.
1825+
LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
1826+
moduleTranslation, allocaIP);
1827+
1828+
// Allocate and initialize private variables
1829+
// TODO: package private variables up in a structure
1830+
for (auto [privDecl, mlirPrivVar, blockArg] :
1831+
llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) {
1832+
llvm::Type *llvmAllocType =
1833+
moduleTranslation.convertType(privDecl.getType());
1834+
1835+
// Allocations:
1836+
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
1837+
llvm::Value *llvmPrivateVar = builder.CreateAlloca(
1838+
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
1839+
1840+
builder.SetInsertPoint(initBlock->getTerminator());
1841+
auto err = initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
1842+
blockArg, &llvmPrivateVar, initBlock);
1843+
if (err)
1844+
return handleError(std::move(err), *taskOp.getOperation());
1845+
1846+
llvmPrivateVars.push_back(llvmPrivateVar);
1847+
}
1848+
1849+
// firstprivate copy region
1850+
builder.SetInsertPoint(copyBlock->getTerminator());
1851+
if (failed(copyFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars,
1852+
llvmPrivateVars, privateDecls)))
1853+
return llvm::failure();
1854+
1855+
// Set up for call to createTask()
1856+
builder.SetInsertPoint(taskStartBlock);
18101857

1858+
auto bodyCB = [&](InsertPointTy allocaIP,
1859+
InsertPointTy codegenIP) -> llvm::Error {
1860+
builder.restoreIP(codegenIP);
18111861
// translate the body of the task:
18121862
auto continuationBlockOrError = convertOmpOpRegions(
18131863
taskOp.getRegion(), "omp.task.region", builder, moduleTranslation);
@@ -1827,8 +1877,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
18271877
buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(),
18281878
moduleTranslation, dds);
18291879

1830-
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1831-
findAllocaInsertPoint(builder, moduleTranslation);
18321880
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
18331881
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
18341882
moduleTranslation.getOpenMPBuilder()->createTask(

mlir/test/Target/LLVMIR/openmp-llvm.mlir

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2790,11 +2790,14 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) {
27902790
}
27912791

27922792
// CHECK-LABEL: @par_task_
2793+
// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8
27932794
// CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]])
27942795
// CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]])
2795-
// CHECK: define internal void @[[task_outlined_fn]]
2796-
// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8
2797-
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]])
2796+
// CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]])
2797+
// CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8
2798+
// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0
2799+
// CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8
2800+
// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]])
27982801
// CHECK: define internal void @[[parallel_outlined_fn]]
27992802
// -----
28002803

@@ -2819,33 +2822,36 @@ llvm.func @task(%arg0 : !llvm.ptr) {
28192822
}
28202823
llvm.return
28212824
}
2825+
// CHECK-LABEL: @task
2826+
// CHECK-SAME: (ptr %[[ARG:.*]])
2827+
// CHECK: %[[STRUCT_ARG:.*]] = alloca { ptr }, align 8
2828+
// CHECK: %[[OMP_PRIVATE_ALLOC:.*]] = alloca i32, align 4
2829+
// ...
2830+
// CHECK: br label %omp.private.init
2831+
// CHECK: omp.private.init:
2832+
// CHECK: br label %omp.private.copy1
2833+
// CHECK: omp.private.copy1:
2834+
// CHECK: %[[LOADED:.*]] = load i32, ptr %[[ARG]], align 4
2835+
// CHECK: store i32 %[[LOADED]], ptr %[[OMP_PRIVATE_ALLOC]], align 4
2836+
// ...
2837+
// CHECK: br label %omp.task.start
2838+
// CHECK: omp.task.start:
2839+
// CHECK: br label %[[CODEREPL:.*]]
2840+
// CHECK: [[CODEREPL]]:
2841+
28222842
// CHECK-LABEL: @task..omp_par
2823-
// CHECK: task.alloca:
2824-
// CHECK: %[[VAL_11:.*]] = load ptr, ptr %[[VAL_12:.*]], align 8
2825-
// CHECK: %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_11]], i32 0, i32 0
2843+
// CHECK: task.alloca:
2844+
// CHECK: %[[VAL_12:.*]] = load ptr, ptr %[[STRUCT_ARG:.*]], align 8
2845+
// CHECK: %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_12]], i32 0, i32 0
28262846
// CHECK: %[[VAL_14:.*]] = load ptr, ptr %[[VAL_13]], align 8
2827-
// CHECK: %[[VAL_15:.*]] = alloca i32, align 4
2828-
// CHECK: br label %omp.region.after_alloca
2829-
2830-
// CHECK: omp.region.after_alloca:
28312847
// CHECK: br label %task.body
2832-
2833-
// CHECK: task.body: ; preds = %omp.region.after_alloca
2834-
// CHECK: br label %omp.private.init
2835-
2836-
// CHECK: omp.private.init: ; preds = %task.body
2837-
// CHECK: br label %omp.private.copy
2838-
2839-
// CHECK: omp.private.copy: ; preds = %omp.private.init
2840-
// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_14]], align 4
2841-
// CHECK: store i32 %[[VAL_19]], ptr %[[VAL_15]], align 4
2848+
// CHECK: task.body: ; preds = %task.alloca
28422849
// CHECK: br label %omp.task.region
2843-
2844-
// CHECK: omp.task.region: ; preds = %omp.private.copy
2845-
// CHECK: call void @foo(ptr %[[VAL_15]])
2850+
// CHECK: omp.task.region: ; preds = %task.body
2851+
// CHECK: call void @foo(ptr %[[VAL_14]])
28462852
// CHECK: br label %omp.region.cont
28472853
// CHECK: omp.region.cont: ; preds = %omp.task.region
2848-
// CHECK: call void @destroy(ptr %[[VAL_15]])
2854+
// CHECK: call void @destroy(ptr %[[VAL_14]])
28492855
// CHECK: br label %task.exit.exitStub
28502856
// CHECK: task.exit.exitStub: ; preds = %omp.region.cont
28512857
// CHECK: ret void
@@ -2915,6 +2921,19 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
29152921
// CHECK: br label %[[omp_region_cont:[^,]+]]
29162922
// CHECK: [[omp_taskgroup_region]]:
29172923
// CHECK: %{{.+}} = alloca i8, align 1
2924+
// CHECK: br label %[[omp_private_init:[^,]+]]
2925+
// CHECK: [[omp_private_init]]:
2926+
// CHECK: br label %[[omp_private_copy:[^,]+]]
2927+
// CHECK: [[omp_private_copy]]:
2928+
// CHECK: br label %[[omp_task_start:[^,]+]]
2929+
2930+
// CHECK: [[omp_region_cont:[^,]+]]:
2931+
// CHECK: br label %[[taskgroup_exit:[^,]+]]
2932+
// CHECK: [[taskgroup_exit]]:
2933+
// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
2934+
// CHECK: ret void
2935+
2936+
// CHECK: [[omp_task_start]]:
29182937
// CHECK: br label %[[codeRepl:[^,]+]]
29192938
// CHECK: [[codeRepl]]:
29202939
// CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}})
@@ -2938,11 +2957,6 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) {
29382957
// CHECK: br label %[[task_exit3:[^,]+]]
29392958
// CHECK: [[task_exit3]]:
29402959
// CHECK: br label %[[omp_taskgroup_region1]]
2941-
// CHECK: [[omp_region_cont]]:
2942-
// CHECK: br label %[[taskgroup_exit:[^,]+]]
2943-
// CHECK: [[taskgroup_exit]]:
2944-
// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]])
2945-
// CHECK: ret void
29462960
// CHECK: }
29472961

29482962
// -----

0 commit comments

Comments
 (0)