Skip to content

Commit a57b8d3

Browse files
committed
[flang][OpenMP] Add support for target ... private
Adds support for the `private` clause in the `target` directive. In order to support that, the `DataSharingProcessor` was also restructured in order to separate the collection of prviate symbols from their actual privatization code-gen. The commit adds both a code-gen and an offloading tests.
1 parent fa6025e commit a57b8d3

File tree

3 files changed

+117
-23
lines changed

3 files changed

+117
-23
lines changed

flang/lib/Lower/OpenMP.cpp

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -143,15 +143,17 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter,
143143
//===----------------------------------------------------------------------===//
144144

145145
class DataSharingProcessor {
146+
using SymbolSet = llvm::SetVector<const Fortran::semantics::Symbol *>;
147+
146148
bool hasLastPrivateOp;
147149
mlir::OpBuilder::InsertPoint lastPrivIP;
148150
mlir::OpBuilder::InsertPoint insPt;
149151
mlir::Value loopIV;
150152
// Symbols in private, firstprivate, and/or lastprivate clauses.
151-
llvm::SetVector<const Fortran::semantics::Symbol *> privatizedSymbols;
152-
llvm::SetVector<const Fortran::semantics::Symbol *> defaultSymbols;
153-
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInNestedRegions;
154-
llvm::SetVector<const Fortran::semantics::Symbol *> symbolsInParentRegions;
153+
SymbolSet privatizedSymbols;
154+
SymbolSet defaultSymbols;
155+
SymbolSet symbolsInNestedRegions;
156+
SymbolSet symbolsInParentRegions;
155157
Fortran::lower::AbstractConverter &converter;
156158
fir::FirOpBuilder &firOpBuilder;
157159
const Fortran::parser::OmpClauseList &opClauseList;
@@ -182,35 +184,54 @@ class DataSharingProcessor {
182184
: hasLastPrivateOp(false), converter(converter),
183185
firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList),
184186
eval(eval) {}
185-
// Privatisation is split into two steps.
186-
// Step1 performs cloning of all privatisation clauses and copying for
187-
// firstprivates. Step1 is performed at the place where process/processStep1
187+
// Privatisation is split into 3 steps:
188+
//
189+
// * Step1: collects all symbols that should be privatized.
190+
//
191+
// * Step2: performs cloning of all privatisation clauses and copying for
192+
// firstprivates. Step2 is performed at the place where process/processStep2
188193
// is called. This is usually inside the Operation corresponding to the OpenMP
189-
// construct, for looping constructs this is just before the Operation. The
190-
// split into two steps was performed basically to be able to call
191-
// privatisation for looping constructs before the operation is created since
192-
// the bounds of the MLIR OpenMP operation can be privatised.
193-
// Step2 performs the copying for lastprivates and requires knowledge of the
194-
// MLIR operation to insert the last private update. Step2 adds
194+
// construct, for looping constructs this is just before the Operation.
195+
//
196+
// * Step3: performs the copying for lastprivates and requires knowledge of
197+
// the MLIR operation to insert the last private update. Step3 adds
195198
// dealocation code as well.
199+
//
200+
// The split was performed for the following reasons:
201+
//
202+
// 1. Step1 was split so that the `target` op knows which symbols should not
203+
// be mapped into the target region due to being `private`. The implicit
204+
// mapping happens before the op body is generated so we need to to collect
205+
// the private symbols first and then later in the body actually privatize
206+
// them.
207+
//
208+
// 2. Step2 was split in order to call privatisation for looping constructs
209+
// before the operation is created since the bounds of the MLIR OpenMP
210+
// operation can be privatised.
196211
void processStep1();
197-
void processStep2(mlir::Operation *op, bool isLoop);
212+
void processStep2();
213+
void processStep3(mlir::Operation *op, bool isLoop);
198214

199215
void setLoopIV(mlir::Value iv) {
200216
assert(!loopIV && "Loop iteration variable already set");
201217
loopIV = iv;
202218
}
219+
220+
const SymbolSet &getPrivatizedSymbols() const { return privatizedSymbols; }
203221
};
204222

205223
void DataSharingProcessor::processStep1() {
206224
collectSymbolsForPrivatization();
207225
collectDefaultSymbols();
226+
}
227+
228+
void DataSharingProcessor::processStep2() {
208229
privatize();
209230
defaultPrivatize();
210231
insertBarrier();
211232
}
212233

213-
void DataSharingProcessor::processStep2(mlir::Operation *op, bool isLoop) {
234+
void DataSharingProcessor::processStep3(mlir::Operation *op, bool isLoop) {
214235
insPt = firOpBuilder.saveInsertionPoint();
215236
copyLastPrivatize(op);
216237
firOpBuilder.restoreInsertionPoint(insPt);
@@ -2306,11 +2327,12 @@ static void createBodyOfOp(
23062327
if (!dsp) {
23072328
DataSharingProcessor proc(converter, *clauses, eval);
23082329
proc.processStep1();
2309-
proc.processStep2(op, isLoop);
2330+
proc.processStep2();
2331+
proc.processStep3(op, isLoop);
23102332
} else {
23112333
if (isLoop && args.size() > 0)
23122334
dsp->setLoopIV(converter.getSymbolAddress(*args[0]));
2313-
dsp->processStep2(op, isLoop);
2335+
dsp->processStep3(op, isLoop);
23142336
}
23152337

23162338
if (storeOp)
@@ -2648,7 +2670,9 @@ static void genBodyOfTargetOp(
26482670
const llvm::SmallVector<mlir::Type> &mapSymTypes,
26492671
const llvm::SmallVector<mlir::Location> &mapSymLocs,
26502672
const llvm::SmallVector<const Fortran::semantics::Symbol *> &mapSymbols,
2651-
const mlir::Location &currentLocation) {
2673+
const mlir::Location &currentLocation,
2674+
const Fortran::parser::OmpClauseList &clauseList,
2675+
DataSharingProcessor &dsp) {
26522676
assert(mapSymTypes.size() == mapSymLocs.size());
26532677

26542678
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
@@ -2657,6 +2681,8 @@ static void genBodyOfTargetOp(
26572681
auto *regionBlock =
26582682
firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
26592683

2684+
dsp.processStep2();
2685+
26602686
// Clones the `bounds` placing them inside the target region and returns them.
26612687
auto cloneBound = [&](mlir::Value bound) {
26622688
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
@@ -2811,8 +2837,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
28112837
cp.processNowait(nowaitAttr);
28122838
cp.processMap(currentLocation, directive, semanticsContext, stmtCtx,
28132839
mapOperands, &mapSymTypes, &mapSymLocs, &mapSymbols);
2814-
cp.processTODO<Fortran::parser::OmpClause::Private,
2815-
Fortran::parser::OmpClause::Depend,
2840+
cp.processTODO<Fortran::parser::OmpClause::Depend,
28162841
Fortran::parser::OmpClause::Firstprivate,
28172842
Fortran::parser::OmpClause::IsDevicePtr,
28182843
Fortran::parser::OmpClause::HasDeviceAddr,
@@ -2823,11 +2848,19 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
28232848
Fortran::parser::OmpClause::Defaultmap>(
28242849
currentLocation, llvm::omp::Directive::OMPD_target);
28252850

2851+
DataSharingProcessor dsp(converter, clauseList, eval);
2852+
dsp.processStep1();
2853+
28262854
// 5.8.1 Implicit Data-Mapping Attribute Rules
28272855
// The following code follows the implicit data-mapping rules to map all the
2828-
// symbols used inside the region that have not been explicitly mapped using
2829-
// the map clause.
2856+
// symbols used inside the region that do not have explicit data-environment
2857+
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
2858+
// clauses).
28302859
auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) {
2860+
if (dsp.getPrivatizedSymbols().contains(&sym)) {
2861+
return;
2862+
}
2863+
28312864
if (llvm::find(mapSymbols, &sym) == mapSymbols.end()) {
28322865
mlir::Value baseOp = converter.getSymbolAddress(sym);
28332866
if (!baseOp)
@@ -2893,7 +2926,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter,
28932926
nowaitAttr, mapOperands);
28942927

28952928
genBodyOfTargetOp(converter, eval, genNested, targetOp, mapSymTypes,
2896-
mapSymLocs, mapSymbols, currentLocation);
2929+
mapSymLocs, mapSymbols, currentLocation, clauseList, dsp);
28972930

28982931
return targetOp;
28992932
}
@@ -3127,6 +3160,7 @@ createSimdLoop(Fortran::lower::AbstractConverter &converter,
31273160
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
31283161
DataSharingProcessor dsp(converter, loopOpClauseList, eval);
31293162
dsp.processStep1();
3163+
dsp.processStep2();
31303164

31313165
Fortran::lower::StatementContext stmtCtx;
31323166
mlir::Value scheduleChunkClauseOperand, ifClauseOperand;
@@ -3179,6 +3213,7 @@ static void createWsLoop(Fortran::lower::AbstractConverter &converter,
31793213
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
31803214
DataSharingProcessor dsp(converter, beginClauseList, eval);
31813215
dsp.processStep1();
3216+
dsp.processStep2();
31823217

31833218
Fortran::lower::StatementContext stmtCtx;
31843219
mlir::Value scheduleChunkClauseOperand;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
!Test data-sharing attribute clauses for the `target` directive.
2+
3+
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
4+
5+
!CHECK-LABEL: func.func @_QPomp_target_private()
6+
subroutine omp_target_private
7+
implicit none
8+
integer :: x(1)
9+
10+
!$omp target private(x)
11+
x(1) = 42
12+
!$omp end target
13+
!CHECK: omp.target {
14+
!CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
15+
!CHECK-DAG: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "x",
16+
!CHECK-SAME: pinned, uniq_name = "_QFomp_target_privateEx"}
17+
!CHECK-NEXT: %[[SHAPE:.*]] = fir.shape %[[C1]] : (index) -> !fir.shape<1>
18+
!CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]](%[[SHAPE]])
19+
!CHECK-SAME: {uniq_name = "_QFomp_target_privateEx"} :
20+
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) ->
21+
!CHECK-SAME: (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
22+
!CHECK-DAG: %[[C42:.*]] = arith.constant 42 : i32
23+
!CHECK-DAG: %[[C1_2:.*]] = arith.constant 1 : index
24+
!CHECK-NEXT: %[[PRIV_BINDING:.*]] = hlfir.designate %[[PRIV_DECL]]#0 (%[[C1_2]])
25+
!CHECK-SAME: : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
26+
!CHECK-NEXT: hlfir.assign %[[C42]] to %[[PRIV_BINDING]] : i32, !fir.ref<i32>
27+
!CHECK-NEXT: omp.terminator
28+
!CHECK-NEXT: }
29+
30+
end subroutine omp_target_private
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
! Basic offloading test with a target region
2+
! REQUIRES: flang
3+
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
4+
! UNSUPPORTED: aarch64-unknown-linux-gnu
5+
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
6+
! UNSUPPORTED: x86_64-pc-linux-gnu
7+
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
8+
9+
! RUN: %libomptarget-compile-fortran-generic
10+
! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
11+
program target_update
12+
implicit none
13+
integer :: x(1)
14+
integer :: y(1)
15+
16+
x(1) = 42
17+
18+
!$omp target private(x) map(tofrom: y)
19+
x(1) = 84
20+
y(1) = x(1)
21+
!$omp end target
22+
23+
print *, "x =", x(1)
24+
print *, "y =", y(1)
25+
26+
end program target_update
27+
! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
28+
! CHECK: x = 42
29+
! CHECK: y = 84

0 commit comments

Comments
 (0)