@@ -639,27 +639,42 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
639
639
// Build the argument list.
640
640
bool NeedWrapperFunction =
641
641
getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
642
- FunctionArgList Args;
643
- llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
644
- llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
642
+ FunctionArgList Args, WrapperArgs;
643
+ llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs,
644
+ WrapperLocalAddrs;
645
+ llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes,
646
+ WrapperVLASizes;
645
647
SmallString<256> Buffer;
646
648
llvm::raw_svector_ostream Out(Buffer);
647
649
Out << CapturedStmtInfo->getHelperName();
648
- if (NeedWrapperFunction)
650
+
651
+ CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
652
+ llvm::Function *WrapperF = nullptr;
653
+ if (NeedWrapperFunction) {
654
+ // Emit the final kernel early to allow attributes to be added by the
655
+ // OpenMPI-IR-Builder.
656
+ FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
657
+ /*RegisterCastedArgsOnly=*/true,
658
+ CapturedStmtInfo->getHelperName(), Loc);
659
+ WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
660
+ WrapperF =
661
+ emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
662
+ WrapperCGF.CXXThisValue, WrapperFO);
649
663
Out << "_debug__";
664
+ }
650
665
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
651
666
Out.str(), Loc);
652
- llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
653
- VLASizes , CXXThisValue, FO);
667
+ llvm::Function *F = emitOutlinedFunctionPrologue(
668
+ *this, WrapperArgs, WrapperLocalAddrs, WrapperVLASizes , CXXThisValue, FO);
654
669
CodeGenFunction::OMPPrivateScope LocalScope(*this);
655
- for (const auto &LocalAddrPair : LocalAddrs ) {
670
+ for (const auto &LocalAddrPair : WrapperLocalAddrs ) {
656
671
if (LocalAddrPair.second.first) {
657
672
LocalScope.addPrivate(LocalAddrPair.second.first,
658
673
LocalAddrPair.second.second);
659
674
}
660
675
}
661
676
(void)LocalScope.Privatize();
662
- for (const auto &VLASizePair : VLASizes )
677
+ for (const auto &VLASizePair : WrapperVLASizes )
663
678
VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
664
679
PGO.assignRegionCounters(GlobalDecl(CD), F);
665
680
CapturedStmtInfo->EmitBody(*this, CD->getBody());
@@ -668,17 +683,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
668
683
if (!NeedWrapperFunction)
669
684
return F;
670
685
671
- FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
672
- /*RegisterCastedArgsOnly=*/true,
673
- CapturedStmtInfo->getHelperName(), Loc);
674
- CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
675
- WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
676
- Args.clear();
677
- LocalAddrs.clear();
678
- VLASizes.clear();
679
- llvm::Function *WrapperF =
680
- emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
681
- WrapperCGF.CXXThisValue, WrapperFO);
686
+ // Reverse the order.
687
+ WrapperF->removeFromParent();
688
+ F->getParent()->getFunctionList().insertAfter(F->getIterator(), WrapperF);
689
+
682
690
llvm::SmallVector<llvm::Value *, 4> CallArgs;
683
691
auto *PI = F->arg_begin();
684
692
for (const auto *Arg : Args) {
0 commit comments