@@ -161,6 +161,11 @@ class DataSharingProcessor {
161
161
const Fortran::parser::OmpClauseList &opClauseList;
162
162
Fortran::lower::pft::Evaluation &eval;
163
163
164
+ bool useDelayedPrivatizationWhenPossible;
165
+ Fortran::lower::SymMap *symTable;
166
+ llvm::SetVector<mlir::SymbolRefAttr> privateInitializers;
167
+ llvm::SetVector<mlir::Value> privateSymHostAddrsses;
168
+
164
169
bool needBarrier ();
165
170
void collectSymbols (Fortran::semantics::Symbol::Flag flag);
166
171
void collectOmpObjectListSymbol (
@@ -182,10 +187,14 @@ class DataSharingProcessor {
182
187
public:
183
188
DataSharingProcessor (Fortran::lower::AbstractConverter &converter,
184
189
const Fortran::parser::OmpClauseList &opClauseList,
185
- Fortran::lower::pft::Evaluation &eval)
190
+ Fortran::lower::pft::Evaluation &eval,
191
+ bool useDelayedPrivatizationWhenPossible = false ,
192
+ Fortran::lower::SymMap *symTable = nullptr )
186
193
: hasLastPrivateOp(false ), converter(converter),
187
194
firOpBuilder (converter.getFirOpBuilder()), opClauseList(opClauseList),
188
- eval(eval) {}
195
+ eval(eval), useDelayedPrivatizationWhenPossible(
196
+ useDelayedPrivatizationWhenPossible),
197
+ symTable(symTable) {}
189
198
// Privatisation is split into two steps.
190
199
// Step1 performs cloning of all privatisation clauses and copying for
191
200
// firstprivates. Step1 is performed at the place where process/processStep1
@@ -204,6 +213,14 @@ class DataSharingProcessor {
204
213
assert (!loopIV && " Loop iteration variable already set" );
205
214
loopIV = iv;
206
215
}
216
+
217
+ const llvm::SetVector<mlir::SymbolRefAttr> &getPrivateInitializers () const {
218
+ return privateInitializers;
219
+ };
220
+
221
+ const llvm::SetVector<mlir::Value> &getPrivateSymHostAddrsses () const {
222
+ return privateSymHostAddrsses;
223
+ }
207
224
};
208
225
209
226
void DataSharingProcessor::processStep1 () {
@@ -496,8 +513,46 @@ void DataSharingProcessor::privatize() {
496
513
copyFirstPrivateSymbol (&*mem);
497
514
}
498
515
} else {
499
- cloneSymbol (sym);
500
- copyFirstPrivateSymbol (sym);
516
+ if (useDelayedPrivatizationWhenPossible) {
517
+ auto ip = firOpBuilder.saveInsertionPoint ();
518
+
519
+ auto moduleOp = firOpBuilder.getInsertionBlock ()
520
+ ->getParentOp ()
521
+ ->getParentOfType <mlir::ModuleOp>();
522
+
523
+ firOpBuilder.setInsertionPoint (&moduleOp.getBodyRegion ().front (),
524
+ moduleOp.getBodyRegion ().front ().end ());
525
+
526
+ Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol (*sym);
527
+ assert (hsb && " Host symbol box not found" );
528
+
529
+ auto symType = hsb.getAddr ().getType ();
530
+ auto symLoc = hsb.getAddr ().getLoc ();
531
+ auto privatizerOp = firOpBuilder.create <mlir::omp::PrivateClauseOp>(
532
+ symLoc, symType, sym->name ().ToString ());
533
+ firOpBuilder.setInsertionPointToEnd (&privatizerOp.getBody ().front ());
534
+
535
+ symTable->pushScope ();
536
+ symTable->addSymbol (*sym, privatizerOp.getArgument (0 ));
537
+ symTable->pushScope ();
538
+
539
+ cloneSymbol (sym);
540
+ copyFirstPrivateSymbol (sym);
541
+
542
+ firOpBuilder.create <mlir::omp::YieldOp>(
543
+ hsb.getAddr ().getLoc (),
544
+ symTable->shallowLookupSymbol (*sym).getAddr ());
545
+
546
+ symTable->popScope ();
547
+ symTable->popScope ();
548
+ firOpBuilder.restoreInsertionPoint (ip);
549
+
550
+ privateInitializers.insert (mlir::SymbolRefAttr::get (privatizerOp));
551
+ privateSymHostAddrsses.insert (hsb.getAddr ());
552
+ } else {
553
+ cloneSymbol (sym);
554
+ copyFirstPrivateSymbol (sym);
555
+ }
501
556
}
502
557
}
503
558
}
@@ -2463,12 +2518,12 @@ static OpTy genOpWithBody(Fortran::lower::AbstractConverter &converter,
2463
2518
Fortran::lower::pft::Evaluation &eval, bool genNested,
2464
2519
mlir::Location currentLocation, bool outerCombined,
2465
2520
const Fortran::parser::OmpClauseList *clauseList,
2466
- Args &&...args) {
2521
+ DataSharingProcessor *dsp, Args &&...args) {
2467
2522
auto op = converter.getFirOpBuilder ().create <OpTy>(
2468
2523
currentLocation, std::forward<Args>(args)...);
2469
2524
createBodyOfOp<OpTy>(op, converter, currentLocation, eval, genNested,
2470
2525
clauseList,
2471
- /* args=*/ {}, outerCombined);
2526
+ /* args=*/ {}, outerCombined, dsp );
2472
2527
return op;
2473
2528
}
2474
2529
@@ -2480,21 +2535,25 @@ genMasterOp(Fortran::lower::AbstractConverter &converter,
2480
2535
currentLocation,
2481
2536
/* outerCombined=*/ false ,
2482
2537
/* clauseList=*/ nullptr ,
2538
+ /* dsp=*/ nullptr ,
2483
2539
/* resultTypes=*/ mlir::TypeRange ());
2484
2540
}
2485
2541
2486
2542
static mlir::omp::OrderedRegionOp
2487
2543
genOrderedRegionOp (Fortran::lower::AbstractConverter &converter,
2488
2544
Fortran::lower::pft::Evaluation &eval, bool genNested,
2489
2545
mlir::Location currentLocation) {
2490
- return genOpWithBody<mlir::omp::OrderedRegionOp>(
2491
- converter, eval, genNested, currentLocation,
2492
- /* outerCombined=*/ false ,
2493
- /* clauseList=*/ nullptr , /* simd=*/ false );
2546
+ return genOpWithBody<mlir::omp::OrderedRegionOp>(converter, eval, genNested,
2547
+ currentLocation,
2548
+ /* outerCombined=*/ false ,
2549
+ /* clauseList=*/ nullptr ,
2550
+ /* dsp=*/ nullptr ,
2551
+ /* simd=*/ false );
2494
2552
}
2495
2553
2496
2554
static mlir::omp::ParallelOp
2497
2555
genParallelOp (Fortran::lower::AbstractConverter &converter,
2556
+ Fortran::lower::SymMap &symTable,
2498
2557
Fortran::lower::pft::Evaluation &eval, bool genNested,
2499
2558
mlir::Location currentLocation,
2500
2559
const Fortran::parser::OmpClauseList &clauseList,
@@ -2516,16 +2575,37 @@ genParallelOp(Fortran::lower::AbstractConverter &converter,
2516
2575
if (!outerCombined)
2517
2576
cp.processReduction (currentLocation, reductionVars, reductionDeclSymbols);
2518
2577
2578
+ bool privatize = !outerCombined;
2579
+ DataSharingProcessor dsp (converter, clauseList, eval,
2580
+ /* useDelayedPrivatizationWhenPossible=*/ true ,
2581
+ &symTable);
2582
+
2583
+ if (privatize) {
2584
+ dsp.processStep1 ();
2585
+ }
2586
+
2587
+ llvm::SmallVector<mlir::Attribute> privateInits (
2588
+ dsp.getPrivateInitializers ().begin (), dsp.getPrivateInitializers ().end ());
2589
+
2590
+ llvm::SmallVector<mlir::Value> privateSymAddresses (
2591
+ dsp.getPrivateSymHostAddrsses ().begin (),
2592
+ dsp.getPrivateSymHostAddrsses ().end ());
2593
+
2519
2594
return genOpWithBody<mlir::omp::ParallelOp>(
2520
2595
converter, eval, genNested, currentLocation, outerCombined, &clauseList,
2596
+ &dsp,
2521
2597
/* resultTypes=*/ mlir::TypeRange (), ifClauseOperand,
2522
2598
numThreadsClauseOperand, allocateOperands, allocatorOperands,
2523
- reductionVars,
2599
+ reductionVars, privateSymAddresses,
2524
2600
reductionDeclSymbols.empty ()
2525
2601
? nullptr
2526
2602
: mlir::ArrayAttr::get (converter.getFirOpBuilder ().getContext (),
2527
2603
reductionDeclSymbols),
2528
- procBindKindAttr);
2604
+ procBindKindAttr,
2605
+ privateInits.empty ()
2606
+ ? nullptr
2607
+ : mlir::ArrayAttr::get (converter.getFirOpBuilder ().getContext (),
2608
+ privateInits));
2529
2609
}
2530
2610
2531
2611
static mlir::omp::SectionOp
@@ -2537,7 +2617,8 @@ genSectionOp(Fortran::lower::AbstractConverter &converter,
2537
2617
// all privatization is done within `omp.section` operations.
2538
2618
return genOpWithBody<mlir::omp::SectionOp>(
2539
2619
converter, eval, genNested, currentLocation,
2540
- /* outerCombined=*/ false , §ionsClauseList);
2620
+ /* outerCombined=*/ false , §ionsClauseList,
2621
+ /* dsp=*/ nullptr );
2541
2622
}
2542
2623
2543
2624
static mlir::omp::SingleOp
@@ -2558,8 +2639,8 @@ genSingleOp(Fortran::lower::AbstractConverter &converter,
2558
2639
2559
2640
return genOpWithBody<mlir::omp::SingleOp>(
2560
2641
converter, eval, genNested, currentLocation,
2561
- /* outerCombined=*/ false , &beginClauseList, allocateOperands ,
2562
- allocatorOperands, nowaitAttr);
2642
+ /* outerCombined=*/ false , &beginClauseList, /* dsp= */ nullptr ,
2643
+ allocateOperands, allocatorOperands, nowaitAttr);
2563
2644
}
2564
2645
2565
2646
static mlir::omp::TaskOp
@@ -2591,8 +2672,8 @@ genTaskOp(Fortran::lower::AbstractConverter &converter,
2591
2672
2592
2673
return genOpWithBody<mlir::omp::TaskOp>(
2593
2674
converter, eval, genNested, currentLocation,
2594
- /* outerCombined=*/ false , &clauseList, ifClauseOperand, finalClauseOperand ,
2595
- untiedAttr, mergeableAttr,
2675
+ /* outerCombined=*/ false , &clauseList, /* dsp= */ nullptr , ifClauseOperand ,
2676
+ finalClauseOperand, untiedAttr, mergeableAttr,
2596
2677
/* in_reduction_vars=*/ mlir::ValueRange (),
2597
2678
/* in_reductions=*/ nullptr , priorityClauseOperand,
2598
2679
dependTypeOperands.empty ()
@@ -2615,6 +2696,7 @@ genTaskGroupOp(Fortran::lower::AbstractConverter &converter,
2615
2696
return genOpWithBody<mlir::omp::TaskGroupOp>(
2616
2697
converter, eval, genNested, currentLocation,
2617
2698
/* outerCombined=*/ false , &clauseList,
2699
+ /* dsp=*/ nullptr ,
2618
2700
/* task_reduction_vars=*/ mlir::ValueRange (),
2619
2701
/* task_reductions=*/ nullptr , allocateOperands, allocatorOperands);
2620
2702
}
@@ -2994,6 +3076,7 @@ genTeamsOp(Fortran::lower::AbstractConverter &converter,
2994
3076
2995
3077
return genOpWithBody<mlir::omp::TeamsOp>(
2996
3078
converter, eval, genNested, currentLocation, outerCombined, &clauseList,
3079
+ /* dsp=*/ nullptr ,
2997
3080
/* num_teams_lower=*/ nullptr , numTeamsClauseOperand, ifClauseOperand,
2998
3081
threadLimitClauseOperand, allocateOperands, allocatorOperands,
2999
3082
reductionVars,
@@ -3392,8 +3475,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter,
3392
3475
if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet)
3393
3476
.test (ompDirective)) {
3394
3477
validDirective = true ;
3395
- genParallelOp (converter, eval, /* genNested=*/ false , currentLocation ,
3396
- loopOpClauseList,
3478
+ genParallelOp (converter, symTable, eval, /* genNested=*/ false ,
3479
+ currentLocation, loopOpClauseList,
3397
3480
/* outerCombined=*/ true );
3398
3481
}
3399
3482
}
@@ -3481,8 +3564,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3481
3564
genOrderedRegionOp (converter, eval, /* genNested=*/ true , currentLocation);
3482
3565
break ;
3483
3566
case llvm::omp::Directive::OMPD_parallel:
3484
- genParallelOp (converter, eval, /* genNested=*/ true , currentLocation ,
3485
- beginClauseList);
3567
+ genParallelOp (converter, symTable, eval, /* genNested=*/ true ,
3568
+ currentLocation, beginClauseList);
3486
3569
break ;
3487
3570
case llvm::omp::Directive::OMPD_single:
3488
3571
genSingleOp (converter, eval, /* genNested=*/ true , currentLocation,
@@ -3541,8 +3624,8 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3541
3624
.test (directive.v )) {
3542
3625
bool outerCombined =
3543
3626
directive.v != llvm::omp::Directive::OMPD_target_parallel;
3544
- genParallelOp (converter, eval, /* genNested=*/ false , currentLocation ,
3545
- beginClauseList, outerCombined);
3627
+ genParallelOp (converter, symTable, eval, /* genNested=*/ false ,
3628
+ currentLocation, beginClauseList, outerCombined);
3546
3629
combinedDirective = true ;
3547
3630
}
3548
3631
if ((llvm::omp::workShareSet & llvm::omp::blockConstructSet)
@@ -3625,7 +3708,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3625
3708
3626
3709
// Parallel wrapper of PARALLEL SECTIONS construct
3627
3710
if (dir == llvm::omp::Directive::OMPD_parallel_sections) {
3628
- genParallelOp (converter, eval,
3711
+ genParallelOp (converter, symTable, eval,
3629
3712
/* genNested=*/ false , currentLocation, sectionsClauseList,
3630
3713
/* outerCombined=*/ true );
3631
3714
} else {
@@ -3642,6 +3725,7 @@ genOMP(Fortran::lower::AbstractConverter &converter,
3642
3725
/* genNested=*/ false , currentLocation,
3643
3726
/* outerCombined=*/ false ,
3644
3727
/* clauseList=*/ nullptr ,
3728
+ /* dsp=*/ nullptr ,
3645
3729
/* reduction_vars=*/ mlir::ValueRange (),
3646
3730
/* reductions=*/ nullptr , allocateOperands,
3647
3731
allocatorOperands, nowaitClauseOperand);
0 commit comments