@@ -1750,25 +1750,80 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1750
1750
for (mlir::Value privateVar : taskOp.getPrivateVars ())
1751
1751
mlirPrivateVars.push_back (privateVar);
1752
1752
1753
- auto bodyCB = [&](InsertPointTy allocaIP,
1754
- InsertPointTy codegenIP) -> llvm::Error {
1755
- // Save the alloca insertion point on ModuleTranslation stack for use in
1756
- // nested regions.
1757
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame (
1758
- moduleTranslation, allocaIP);
1753
+ // Allocate and copy private variables before creating the task. This avoids
1754
+ // accessing invalid memory if (after this scope ends) the private variables
1755
+ // are initialized from host variables or if the variables are copied into
1756
+ // from host variables (firstprivate). The insertion point is just before
1757
+ // where the code for creating and scheduling the task will go. That puts this
1758
+ // code outside of the outlined task region, which is what we want because
1759
+ // this way the initialization and copy regions are executed immediately while
1760
+ // the host variable data are still live.
1759
1761
1760
- llvm::Expected<llvm::BasicBlock *> afterAllocas =
1761
- allocateAndInitPrivateVars (builder, moduleTranslation, privateBlockArgs,
1762
- privateDecls, mlirPrivateVars,
1763
- llvmPrivateVars, allocaIP);
1764
- if (handleError (afterAllocas, *taskOp).failed ())
1765
- return llvm::make_error<PreviouslyReportedError>();
1762
+ llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1763
+ findAllocaInsertPoint (builder, moduleTranslation);
1766
1764
1767
- if (failed (initFirstPrivateVars (builder, moduleTranslation, mlirPrivateVars,
1768
- llvmPrivateVars, privateDecls,
1769
- afterAllocas.get ())))
1770
- return llvm::make_error<PreviouslyReportedError>();
1765
+ // Not using splitBB() because that requires the current block to have a
1766
+ // terminator.
1767
+ assert (builder.GetInsertPoint () == builder.GetInsertBlock ()->end ());
1768
+ llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create (
1769
+ builder.getContext (), " omp.task.start" ,
1770
+ /* Parent=*/ builder.GetInsertBlock ()->getParent ());
1771
+ llvm::Instruction *branchToTaskStartBlock = builder.CreateBr (taskStartBlock);
1772
+ builder.SetInsertPoint (branchToTaskStartBlock);
1773
+
1774
+ // Now do this again to make the initialization and copy blocks
1775
+ llvm::BasicBlock *copyBlock =
1776
+ splitBB (builder, /* CreateBranch=*/ true , " omp.private.copy" );
1777
+ llvm::BasicBlock *initBlock =
1778
+ splitBB (builder, /* CreateBranch=*/ true , " omp.private.init" );
1779
+
1780
+ // Now the control flow graph should look like
1781
+ // starter_block:
1782
+ // <---- where we started when convertOmpTaskOp was called
1783
+ // br %omp.private.init
1784
+ // omp.private.init:
1785
+ // br %omp.private.copy
1786
+ // omp.private.copy:
1787
+ // br %omp.task.start
1788
+ // omp.task.start:
1789
+ // <---- where we want the insertion point to be when we call createTask()
1790
+
1791
+ // Save the alloca insertion point on ModuleTranslation stack for use in
1792
+ // nested regions.
1793
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame (
1794
+ moduleTranslation, allocaIP);
1795
+
1796
+ // Allocate and initialize private variables
1797
+ // TODO: package private variables up in a structure
1798
+ builder.SetInsertPoint (initBlock->getTerminator ());
1799
+ for (auto [privDecl, mlirPrivVar, blockArg] :
1800
+ llvm::zip_equal (privateDecls, mlirPrivateVars, privateBlockArgs)) {
1801
+ llvm::Type *llvmAllocType =
1802
+ moduleTranslation.convertType (privDecl.getType ());
1771
1803
1804
+ // Allocations:
1805
+ builder.SetInsertPoint (allocaIP.getBlock ()->getTerminator ());
1806
+ llvm::Value *llvmPrivateVar = builder.CreateAlloca (
1807
+ llvmAllocType, /* ArraySize=*/ nullptr , " omp.private.alloc" );
1808
+
1809
+ // builder.SetInsertPoint(initBlock->getTerminator());
1810
+ auto err =
1811
+ initPrivateVar (builder, moduleTranslation, privDecl, mlirPrivVar,
1812
+ blockArg, llvmPrivateVar, llvmPrivateVars, initBlock);
1813
+ if (err)
1814
+ return handleError (std::move (err), *taskOp.getOperation ());
1815
+ }
1816
+
1817
+ // firstprivate copy region
1818
+ if (failed (initFirstPrivateVars (builder, moduleTranslation, mlirPrivateVars,
1819
+ llvmPrivateVars, privateDecls, copyBlock)))
1820
+ return llvm::failure ();
1821
+
1822
+ // Set up for call to createTask()
1823
+ builder.SetInsertPoint (taskStartBlock);
1824
+
1825
+ auto bodyCB = [&](InsertPointTy allocaIP,
1826
+ InsertPointTy codegenIP) -> llvm::Error {
1772
1827
// translate the body of the task:
1773
1828
builder.restoreIP (codegenIP);
1774
1829
auto continuationBlockOrError = convertOmpOpRegions (
@@ -1789,8 +1844,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
1789
1844
buildDependData (taskOp.getDependKinds (), taskOp.getDependVars (),
1790
1845
moduleTranslation, dds);
1791
1846
1792
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
1793
- findAllocaInsertPoint (builder, moduleTranslation);
1794
1847
llvm::OpenMPIRBuilder::LocationDescription ompLoc (builder);
1795
1848
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
1796
1849
moduleTranslation.getOpenMPBuilder ()->createTask (
0 commit comments