-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause #134709
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 27 commits
a05af19
4e6eea6
18e1708
59ab4be
e45c30a
980bc06
a103dfa
526314c
c77fb0e
f202eaa
9d2370b
0ca2f86
9335af1
e1a1998
efd69bb
c01671e
ad0d2f0
4df2910
2468be3
9576c87
7e324bd
262a861
a0d29ab
0c2978c
384cd4a
76db75a
0b59740
694e241
4c36ba7
b146a1a
3bb17c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -4898,6 +4898,274 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, | |||||
} | ||||||
} | ||||||
|
||||||
void CGOpenMPRuntime::emitPrivateReduction( | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, | ||||||
const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) { | ||||||
|
||||||
// Create a shared global variable (__shared_reduction_var) to accumulate the | ||||||
// final result. | ||||||
// | ||||||
// Call __kmpc_barrier to synchronize threads before initialization. | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
// | ||||||
// The master thread (thread_id == 0) initializes __shared_reduction_var | ||||||
// with the identity value or initializer. | ||||||
// | ||||||
// Call __kmpc_barrier to synchronize before combining. | ||||||
// For each i: | ||||||
// - Thread enters critical section. | ||||||
// - Reads its private value from LHSExprs[i]. | ||||||
// - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i], | ||||||
// LHSExprs[i]). | ||||||
// - Exits critical section. | ||||||
// | ||||||
// Call __kmpc_barrier after combining. | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
// | ||||||
// Each thread copies __shared_reduction_var[i] back to LHSExprs[i]. | ||||||
// | ||||||
// Final __kmpc_barrier to synchronize after broadcasting | ||||||
QualType PrivateType = Privates->getType(); | ||||||
llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType); | ||||||
|
||||||
llvm::Constant *InitVal = nullptr; | ||||||
const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps); | ||||||
// Determine the initial value for the shared reduction variable | ||||||
if (!UDR) { | ||||||
InitVal = llvm::Constant::getNullValue(LLVMType); | ||||||
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) { | ||||||
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { | ||||||
const Expr *InitExpr = VD->getInit(); | ||||||
if (InitExpr) { | ||||||
Expr::EvalResult Result; | ||||||
if (InitExpr->EvaluateAsRValue(Result, CGF.getContext())) { | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
APValue &InitValue = Result.Val; | ||||||
if (InitValue.isInt()) | ||||||
InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt()); | ||||||
else if (InitValue.isFloat()) | ||||||
InitVal = llvm::ConstantFP::get(LLVMType, InitValue.getFloat()); | ||||||
else if (InitValue.isComplexInt()) { | ||||||
// For complex int: create struct { real, imag } | ||||||
llvm::Constant *Real = llvm::ConstantInt::get( | ||||||
cast<llvm::StructType>(LLVMType)->getElementType(0), | ||||||
InitValue.getComplexIntReal()); | ||||||
llvm::Constant *Imag = llvm::ConstantInt::get( | ||||||
cast<llvm::StructType>(LLVMType)->getElementType(1), | ||||||
InitValue.getComplexIntImag()); | ||||||
InitVal = llvm::ConstantStruct::get( | ||||||
cast<llvm::StructType>(LLVMType), {Real, Imag}); | ||||||
} else if (InitValue.isComplexFloat()) { | ||||||
llvm::Constant *Real = llvm::ConstantFP::get( | ||||||
cast<llvm::StructType>(LLVMType)->getElementType(0), | ||||||
InitValue.getComplexFloatReal()); | ||||||
llvm::Constant *Imag = llvm::ConstantFP::get( | ||||||
cast<llvm::StructType>(LLVMType)->getElementType(1), | ||||||
InitValue.getComplexFloatImag()); | ||||||
InitVal = llvm::ConstantStruct::get( | ||||||
cast<llvm::StructType>(LLVMType), {Real, Imag}); | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
} else { | ||||||
InitVal = llvm::Constant::getNullValue(LLVMType); | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
std::string ReductionVarNameStr; | ||||||
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts())) | ||||||
ReductionVarNameStr = DRE->getDecl()->getNameAsString(); | ||||||
else | ||||||
ReductionVarNameStr = "unnamed_priv_var"; | ||||||
|
||||||
// Create an internal shared variable | ||||||
std::string SharedName = | ||||||
CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr}); | ||||||
llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable( | ||||||
CGM.getModule(), LLVMType, false, llvm::GlobalValue::InternalLinkage, | ||||||
InitVal, ".omp.reduction." + SharedName, nullptr, | ||||||
llvm::GlobalVariable::NotThreadLocal); | ||||||
|
||||||
SharedVar->setAlignment( | ||||||
llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8)); | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
Address SharedResult(SharedVar, SharedVar->getValueType(), | ||||||
CGF.getContext().getTypeAlignInChars(PrivateType)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use CGF.MakeNaturalAlignRawAddrLValue There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done !! |
||||||
|
||||||
llvm::Value *ThreadId = getThreadID(CGF, Loc); | ||||||
llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); | ||||||
llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId}; | ||||||
|
||||||
llvm::BasicBlock *InitBB = CGF.createBasicBlock("init"); | ||||||
llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end"); | ||||||
|
||||||
llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ( | ||||||
ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0)); | ||||||
CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB); | ||||||
|
||||||
CGF.EmitBlock(InitBB); | ||||||
|
||||||
auto EmitSharedInit = [&]() { | ||||||
if (UDR) { // Check if it's a User-Defined Reduction | ||||||
if (const Expr *UDRInitExpr = UDR->getInitializer()) { | ||||||
std::pair<llvm::Function *, llvm::Function *> FnPair = | ||||||
getUserDefinedReduction(UDR); | ||||||
llvm::Function *InitializerFn = FnPair.second; | ||||||
if (InitializerFn) { | ||||||
if (const auto *CE = | ||||||
dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) { | ||||||
const auto *OutDRE = cast<DeclRefExpr>( | ||||||
cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts()) | ||||||
->getSubExpr()); | ||||||
const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl()); | ||||||
|
||||||
CodeGenFunction::OMPPrivateScope LocalScope(CGF); | ||||||
LocalScope.addPrivate(OutVD, SharedResult); | ||||||
|
||||||
(void)LocalScope.Privatize(); | ||||||
if (const auto *OVE = dyn_cast<OpaqueValueExpr>( | ||||||
CE->getCallee()->IgnoreParenImpCasts())) { | ||||||
CodeGenFunction::OpaqueValueMapping OpaqueMap( | ||||||
CGF, OVE, RValue::get(InitializerFn)); | ||||||
CGF.EmitIgnoredExpr(CE); | ||||||
} else { | ||||||
CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult, | ||||||
PrivateType.getQualifiers(), true); | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
} else { | ||||||
CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult, | ||||||
PrivateType.getQualifiers(), true); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done !! |
||||||
} | ||||||
} else { | ||||||
CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult, | ||||||
PrivateType.getQualifiers(), true); | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
} else { | ||||||
// EmitNullInitialization handles default construction for C++ classes | ||||||
// and zeroing for scalars, which is a reasonable default. | ||||||
CGF.EmitNullInitialization(SharedResult, PrivateType); | ||||||
} | ||||||
return; // UDR initialization handled | ||||||
} | ||||||
if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) { | ||||||
if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) { | ||||||
if (const Expr *InitExpr = VD->getInit()) { | ||||||
CGF.EmitAnyExprToMem(InitExpr, SharedResult, | ||||||
PrivateType.getQualifiers(), true); | ||||||
return; | ||||||
} | ||||||
} | ||||||
} | ||||||
CGF.EmitNullInitialization(SharedResult, PrivateType); | ||||||
}; | ||||||
EmitSharedInit(); | ||||||
CGF.Builder.CreateBr(InitEndBB); | ||||||
CGF.EmitBlock(InitEndBB); | ||||||
|
||||||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( | ||||||
CGM.getModule(), OMPRTL___kmpc_barrier), | ||||||
BarrierArgs); | ||||||
|
||||||
const Expr *ReductionOp = ReductionOps; | ||||||
const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp); | ||||||
LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType); | ||||||
LValue LHSLV = CGF.EmitLValue(LHSExprs); | ||||||
|
||||||
auto EmitCriticalReduction = [&](auto ReductionGen) { | ||||||
std::string CriticalName = getName({"reduction_critical"}); | ||||||
emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc); | ||||||
}; | ||||||
|
||||||
if (CurrentUDR) { | ||||||
// Handle user-defined reduction. | ||||||
auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { | ||||||
Action.Enter(CGF); | ||||||
std::pair<llvm::Function *, llvm::Function *> FnPair = | ||||||
getUserDefinedReduction(CurrentUDR); | ||||||
if (FnPair.first) { | ||||||
if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) { | ||||||
const auto *OutDRE = cast<DeclRefExpr>( | ||||||
cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts()) | ||||||
->getSubExpr()); | ||||||
const auto *InDRE = cast<DeclRefExpr>( | ||||||
cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts()) | ||||||
->getSubExpr()); | ||||||
CodeGenFunction::OMPPrivateScope LocalScope(CGF); | ||||||
LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()), | ||||||
SharedLV.getAddress()); | ||||||
LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()), | ||||||
LHSLV.getAddress()); | ||||||
(void)LocalScope.Privatize(); | ||||||
emitReductionCombiner(CGF, ReductionOp); | ||||||
} | ||||||
} | ||||||
}; | ||||||
EmitCriticalReduction(ReductionGen); | ||||||
} else { | ||||||
// Handle built-in reduction operations. | ||||||
const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts(); | ||||||
if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr)) | ||||||
ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts(); | ||||||
|
||||||
const Expr *AssignRHS = nullptr; | ||||||
if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) { | ||||||
if (BinOp->getOpcode() == BO_Assign) | ||||||
AssignRHS = BinOp->getRHS(); | ||||||
} else if (const auto *OpCall = | ||||||
dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) { | ||||||
if (OpCall->getOperator() == OO_Equal) | ||||||
AssignRHS = OpCall->getArg(1); | ||||||
} | ||||||
|
||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
if (!AssignRHS) | ||||||
return; | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { | ||||||
Action.Enter(CGF); | ||||||
const auto *OmpOutDRE = | ||||||
dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts()); | ||||||
const auto *OmpInDRE = | ||||||
dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts()); | ||||||
if (!OmpOutDRE || !OmpInDRE) | ||||||
return; | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl()); | ||||||
const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl()); | ||||||
CodeGenFunction::OMPPrivateScope LocalScope(CGF); | ||||||
LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress()); | ||||||
LocalScope.addPrivate(OmpInVD, LHSLV.getAddress()); | ||||||
(void)LocalScope.Privatize(); | ||||||
// Emit the actual reduction operation | ||||||
CGF.EmitIgnoredExpr(ReductionOp); | ||||||
}; | ||||||
EmitCriticalReduction(ReductionGen); | ||||||
} | ||||||
|
||||||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( | ||||||
CGM.getModule(), OMPRTL___kmpc_barrier), | ||||||
BarrierArgs); | ||||||
|
||||||
// Broadcast final result | ||||||
bool IsAggregate = PrivateType->isAggregateType(); | ||||||
LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType); | ||||||
llvm::Value *FinalResultVal = nullptr; | ||||||
Address FinalResultAddr = Address::invalid(); | ||||||
|
||||||
if (IsAggregate) | ||||||
FinalResultAddr = SharedResult; | ||||||
else | ||||||
FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc); | ||||||
|
||||||
LValue TargetLHSLV = CGF.EmitLValue(LHSExprs); | ||||||
if (IsAggregate) { | ||||||
CGF.EmitAggregateCopy(TargetLHSLV, | ||||||
CGF.MakeAddrLValue(FinalResultAddr, PrivateType), | ||||||
PrivateType, AggValueSlot::DoesNotOverlap, false); | ||||||
} else { | ||||||
CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV); | ||||||
} | ||||||
// Final synchronization barrier | ||||||
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( | ||||||
CGM.getModule(), OMPRTL___kmpc_barrier), | ||||||
BarrierArgs); | ||||||
} | ||||||
|
||||||
void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, | ||||||
ArrayRef<const Expr *> Privates, | ||||||
ArrayRef<const Expr *> LHSExprs, | ||||||
|
@@ -5153,7 +5421,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, | |||||
} else { | ||||||
// Emit as a critical region. | ||||||
auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, | ||||||
const Expr *, const Expr *) { | ||||||
const Expr *, const Expr *) { | ||||||
CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); | ||||||
std::string Name = RT.getName({"atomic_reduction"}); | ||||||
RT.emitCriticalRegion( | ||||||
|
@@ -5200,6 +5468,26 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, | |||||
|
||||||
CGF.EmitBranch(DefaultBB); | ||||||
CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); | ||||||
if (Options.IsPrivateVarReduction) { | ||||||
if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty()) | ||||||
return; | ||||||
if (LHSExprs.size() != Privates.size() || | ||||||
LHSExprs.size() != ReductionOps.size()) | ||||||
return; | ||||||
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
chandraghale marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
assert(!LHSExprs.empty() && "PrivateVarReduction: LHSExprs is empty"); | ||||||
assert(!Privates.empty() && "PrivateVarReduction: Privates is empty"); | ||||||
assert(!ReductionOps.empty() && | ||||||
"PrivateVarReduction: ReductionOps is empty"); | ||||||
assert(LHSExprs.size() == Privates.size() && | ||||||
"PrivateVarReduction: Privates size mismatch"); | ||||||
assert(LHSExprs.size() == ReductionOps.size() && | ||||||
"PrivateVarReduction: ReductionOps size mismatch"); | ||||||
for (unsigned I : | ||||||
llvm::seq<unsigned>(std::min(ReductionOps.size(), LHSExprs.size()))) { | ||||||
emitPrivateReduction(CGF, Loc, Privates[I], LHSExprs[I], RHSExprs[I], | ||||||
ReductionOps[I]); | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
/// Generates unique name for artificial threadprivate variables. | ||||||
|
Uh oh!
There was an error while loading. Please reload this page.