Skip to content

[flang][OpenMP] Implement HAS_DEVICE_ADDR clause #128568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Mar 10, 2025
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions flang/include/flang/Support/OpenMP-utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct EntryBlockArgsEntry {
/// Structure holding the information needed to create and bind entry block
/// arguments associated to all clauses that can define them.
struct EntryBlockArgs {
EntryBlockArgsEntry hasDeviceAddr;
llvm::ArrayRef<mlir::Value> hostEvalVars;
EntryBlockArgsEntry inReduction;
EntryBlockArgsEntry map;
Expand All @@ -44,21 +45,21 @@ struct EntryBlockArgs {
EntryBlockArgsEntry useDevicePtr;

bool isValid() const {
return inReduction.isValid() && map.isValid() && priv.isValid() &&
reduction.isValid() && taskReduction.isValid() &&
return hasDeviceAddr.isValid() && inReduction.isValid() && map.isValid() &&
priv.isValid() && reduction.isValid() && taskReduction.isValid() &&
useDeviceAddr.isValid() && useDevicePtr.isValid();
}

auto getSyms() const {
return llvm::concat<const semantics::Symbol *const>(inReduction.syms,
map.syms, priv.syms, reduction.syms, taskReduction.syms,
useDeviceAddr.syms, useDevicePtr.syms);
return llvm::concat<const semantics::Symbol *const>(hasDeviceAddr.syms,
inReduction.syms, map.syms, priv.syms, reduction.syms,
taskReduction.syms, useDeviceAddr.syms, useDevicePtr.syms);
}

auto getVars() const {
return llvm::concat<const mlir::Value>(hostEvalVars, inReduction.vars,
map.vars, priv.vars, reduction.vars, taskReduction.vars,
useDeviceAddr.vars, useDevicePtr.vars);
return llvm::concat<const mlir::Value>(hasDeviceAddr.vars, hostEvalVars,
inReduction.vars, map.vars, priv.vars, reduction.vars,
taskReduction.vars, useDeviceAddr.vars, useDevicePtr.vars);
}
};

Expand Down
34 changes: 27 additions & 7 deletions flang/lib/Lower/OpenMP/ClauseProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -913,14 +913,34 @@ bool ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const {
}

bool ClauseProcessor::processHasDeviceAddr(
mlir::omp::HasDeviceAddrClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &isDeviceSyms) const {
return findRepeatableClause<omp::clause::HasDeviceAddr>(
[&](const omp::clause::HasDeviceAddr &devAddrClause,
const parser::CharBlock &) {
addUseDeviceClause(converter, devAddrClause.v, result.hasDeviceAddrVars,
isDeviceSyms);
lower::StatementContext &stmtCtx, mlir::omp::HasDeviceAddrClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceSyms) const {
// For HAS_DEVICE_ADDR objects, implicitly map the top-level entities.
// Their address (or the whole descriptor, if the entity had one) will be
// passed to the target region.
std::map<Object, OmpMapParentAndMemberData> parentMemberIndices;
bool clauseFound = findRepeatableClause<omp::clause::HasDeviceAddr>(
[&](const omp::clause::HasDeviceAddr &clause,
const parser::CharBlock &source) {
mlir::Location location = converter.genLocation(source);
llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
omp::ObjectList baseObjects;
llvm::transform(clause.v, std::back_inserter(baseObjects),
[&](const omp::Object &object) {
if (auto maybeBase = getBaseObject(object, semaCtx))
return *maybeBase;
return object;
});
processMapObjects(stmtCtx, location, baseObjects, mapTypeBits,
parentMemberIndices, result.hasDeviceAddrVars,
hasDeviceSyms);
});

insertChildMapInfoIntoParent(converter, semaCtx, stmtCtx, parentMemberIndices,
result.hasDeviceAddrVars, hasDeviceSyms);
return clauseFound;
}

bool ClauseProcessor::processIf(
Expand Down
3 changes: 2 additions & 1 deletion flang/lib/Lower/OpenMP/ClauseProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ class ClauseProcessor {
bool processFinal(lower::StatementContext &stmtCtx,
mlir::omp::FinalClauseOps &result) const;
bool processHasDeviceAddr(
lower::StatementContext &stmtCtx,
mlir::omp::HasDeviceAddrClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &isDeviceSyms) const;
llvm::SmallVectorImpl<const semantics::Symbol *> &hasDeviceSyms) const;
bool processHint(mlir::omp::HintClauseOps &result) const;
bool processInclusive(mlir::Location currentLocation,
mlir::omp::InclusiveClauseOps &result) const;
Expand Down
4 changes: 2 additions & 2 deletions flang/lib/Lower/OpenMP/Clauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ std::optional<Object> getBaseObject(const Object &object,
return Object{SymbolAndDesignatorExtractor::symbol_addr(comp->symbol()),
ea.Designate(evaluate::DataRef{
SymbolAndDesignatorExtractor::AsRvalueRef(*comp)})};
} else if (base.UnwrapSymbolRef()) {
return std::nullopt;
} else if (auto *symRef = base.UnwrapSymbolRef()) {
return Object{const_cast<semantics::Symbol *>(&**symRef), std::nullopt};
}
} else {
assert(std::holds_alternative<evaluate::CoarrayRef>(ref.u) &&
Expand Down
12 changes: 10 additions & 2 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ static void bindEntryBlockArgs(lower::AbstractConverter &converter,
// Process in clause name alphabetical order to match block arguments order.
// Do not bind host_eval variables because they cannot be used inside of the
// corresponding region, except for very specific cases handled separately.
bindMapLike(args.hasDeviceAddr.syms, op.getHasDeviceAddrBlockArgs());
bindPrivateLike(args.inReduction.syms, args.inReduction.vars,
op.getInReductionBlockArgs());
bindMapLike(args.map.syms, op.getMapBlockArgs());
Expand Down Expand Up @@ -1650,7 +1651,7 @@ static void genTargetClauses(
cp.processBare(clauseOps);
cp.processDepend(clauseOps);
cp.processDevice(stmtCtx, clauseOps);
cp.processHasDeviceAddr(clauseOps, hasDeviceAddrSyms);
cp.processHasDeviceAddr(stmtCtx, clauseOps, hasDeviceAddrSyms);
if (!hostEvalInfo.empty()) {
// Only process host_eval if compiling for the host device.
processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc);
Expand Down Expand Up @@ -2196,6 +2197,10 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
if (dsp.getAllSymbolsToPrivatize().contains(&sym))
return;

// These symbols are mapped individually in processHasDeviceAddr.
if (llvm::is_contained(hasDeviceAddrSyms, &sym))
return;

// Structure component symbols don't have bindings, and can only be
// explicitly mapped individually. If a member is captured implicitly
// we map the entirety of the derived type when we find its symbol.
Expand Down Expand Up @@ -2286,10 +2291,13 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,

auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);

llvm::SmallVector<mlir::Value> mapBaseValues;
llvm::SmallVector<mlir::Value> hasDeviceAddrBaseValues, mapBaseValues;
extractMappedBaseValues(clauseOps.hasDeviceAddrVars, hasDeviceAddrBaseValues);
extractMappedBaseValues(clauseOps.mapVars, mapBaseValues);

EntryBlockArgs args;
args.hasDeviceAddr.syms = hasDeviceAddrSyms;
args.hasDeviceAddr.vars = hasDeviceAddrBaseValues;
args.hostEvalVars = clauseOps.hostEvalVars;
// TODO: Add in_reduction syms and vars.
args.map.syms = mapSyms;
Expand Down
62 changes: 48 additions & 14 deletions flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,19 @@ class MapInfoFinalizationPass
}

/// Adjusts the descriptor's map type. The main alteration that is done
/// currently is transforming the map type to `OMP_MAP_TO` where possible.
/// This is because we will always need to map the descriptor to device
/// (or at the very least it seems to be the case currently with the
/// current lowered kernel IR), as without the appropriate descriptor
/// currently is transforming the map type to `OMP_MAP_TO` where possible,
/// plus adding OMP_MAP_ALWAYS flag. Descriptors will always be copied,
/// even if the object was listed on the `has_device_addr` clause.
/// This is because the descriptor can be rematerialized by the compiler,
/// and so the address of the descriptor for a given object at one place in
/// the code may differ from that address in another place. The contents
/// of the descriptor (the base address in particular) will remain unchanged
/// though. Non-descriptor objects listed on the `has_device_addr` clause
/// can be passed to the kernel by just passing their address without any
/// remapping.
/// The adding of the OMP_MAP_TO flag is done because we will always need to
/// map the descriptor to device, especially without device address clauses,
/// as without the appropriate descriptor
/// information on the device there is a risk of the kernel IR
/// requesting for various data that will not have been copied to
/// perform things like indexing. This can cause segfaults and
Expand All @@ -247,16 +256,26 @@ class MapInfoFinalizationPass
mlir::omp::TargetUpdateOp>(target))
return mapTypeFlag;

bool hasImplicitMap =
(llvm::omp::OpenMPOffloadMappingFlags(mapTypeFlag) &
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT) ==
llvm::omp::OpenMPOffloadMappingFlags Implicit =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should likely be "implicit", with a small i, to align with the case style of everything!

llvm::omp::OpenMPOffloadMappingFlags(mapTypeFlag) &
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;

return llvm::to_underlying(
hasImplicitMap
? llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
: llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO);
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | Implicit);
}

/// Check if the mapOp is present in the HasDeviceAddr clause on
/// the userOp. Only applies to TargetOp.
bool isHasDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation *userOp) {
assert(userOp && "Expecting non-null argument");
if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(userOp)) {
for (mlir::Value hda : targetOp.getHasDeviceAddrVars()) {
if (hda.getDefiningOp() == mapOp)
return true;
}
}
return false;
}

mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
Expand All @@ -268,8 +287,7 @@ class MapInfoFinalizationPass
// TODO: map the addendum segment of the descriptor, similarly to the
// base address/data pointer member.
mlir::Value descriptor = getDescriptorFromBoxMap(op, builder);
auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(),
op.getMapType().value_or(0), builder);

mlir::ArrayAttr newMembersAttr;
mlir::SmallVector<mlir::Value> newMembers;
llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices;
Expand All @@ -286,6 +304,12 @@ class MapInfoFinalizationPass
// member information to now have one new member for the base address, or
// we are expanding a parent that is a descriptor and we have to adjust
// all of its members to reflect the insertion of the base address.
//
// If we're expanding a top-level descriptor for a map operation that
// resulted from "has_device_addr" clause, then we want the base pointer
// from the descriptor to be used verbatim, i.e. without additional
// remapping. To avoid this remapping, simply don't generate any map
// information for the descriptor members.
if (!mapMemberUsers.empty()) {
// Currently, there should only be one user per map when this pass
// is executed. Either a parent map, holding the current map in its
Expand All @@ -296,6 +320,8 @@ class MapInfoFinalizationPass
assert(mapMemberUsers.size() == 1 &&
"OMPMapInfoFinalization currently only supports single users of a "
"MapInfoOp");
auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(),
op.getMapType().value_or(0), builder);
ParentAndPlacement mapUser = mapMemberUsers[0];
adjustMemberIndices(memberIndices, mapUser.index);
llvm::SmallVector<mlir::Value> newMemberOps;
Expand All @@ -307,7 +333,9 @@ class MapInfoFinalizationPass
mapUser.parent.getMembersMutable().assign(newMemberOps);
mapUser.parent.setMembersIndexAttr(
builder.create2DI64ArrayAttr(memberIndices));
} else {
} else if (!isHasDeviceAddr(op, target)) {
auto baseAddr = genBaseAddrMap(descriptor, op.getBounds(),
op.getMapType().value_or(0), builder);
newMembers.push_back(baseAddr);
if (!op.getMembers().empty()) {
for (auto &indices : memberIndices)
Expand Down Expand Up @@ -448,6 +476,12 @@ class MapInfoFinalizationPass
addOperands(useDevPtrMutableOpRange, target,
argIface.getUseDevicePtrBlockArgsStart() +
argIface.numUseDevicePtrBlockArgs());
} else if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target)) {
mlir::MutableOperandRange hasDevAddrMutableOpRange =
targetOp.getHasDeviceAddrVarsMutable();
addOperands(hasDevAddrMutableOpRange, target,
argIface.getHasDeviceAddrBlockArgsStart() +
argIface.numHasDeviceAddrBlockArgs());
}
}

Expand Down
10 changes: 6 additions & 4 deletions flang/lib/Support/OpenMP-utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ mlir::Block *genEntryBlock(mlir::OpBuilder &builder, const EntryBlockArgs &args,

llvm::SmallVector<mlir::Type> types;
llvm::SmallVector<mlir::Location> locs;
unsigned numVars = args.hostEvalVars.size() + args.inReduction.vars.size() +
args.map.vars.size() + args.priv.vars.size() +
args.reduction.vars.size() + args.taskReduction.vars.size() +
args.useDeviceAddr.vars.size() + args.useDevicePtr.vars.size();
unsigned numVars = args.hasDeviceAddr.vars.size() + args.hostEvalVars.size() +
args.inReduction.vars.size() + args.map.vars.size() +
args.priv.vars.size() + args.reduction.vars.size() +
args.taskReduction.vars.size() + args.useDeviceAddr.vars.size() +
args.useDevicePtr.vars.size();
types.reserve(numVars);
locs.reserve(numVars);

Expand All @@ -34,6 +35,7 @@ mlir::Block *genEntryBlock(mlir::OpBuilder &builder, const EntryBlockArgs &args,

// Populate block arguments in clause name alphabetical order to match
// expected order by the BlockArgOpenMPOpInterface.
extractTypeLoc(args.hasDeviceAddr.vars);
extractTypeLoc(args.hostEvalVars);
extractTypeLoc(args.inReduction.vars);
extractTypeLoc(args.map.vars);
Expand Down
18 changes: 9 additions & 9 deletions flang/test/Integration/OpenMP/map-types-and-sizes.f90
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ subroutine mapType_array
end subroutine mapType_array

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 0]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976711169, i64 281474976711171, i64 281474976711187]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976711173, i64 281474976711171, i64 281474976711187]
subroutine mapType_ptr
integer, pointer :: a
!$omp target
Expand All @@ -40,7 +40,7 @@ subroutine mapType_ptr
end subroutine mapType_ptr

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 0]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976711169, i64 281474976711171, i64 281474976711187]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976711173, i64 281474976711171, i64 281474976711187]
subroutine mapType_allocatable
integer, allocatable :: a
allocate(a)
Expand All @@ -51,7 +51,7 @@ subroutine mapType_allocatable
end subroutine mapType_allocatable

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 0]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
subroutine mapType_ptr_explicit
integer, pointer :: a
!$omp target map(tofrom: a)
Expand All @@ -60,7 +60,7 @@ subroutine mapType_ptr_explicit
end subroutine mapType_ptr_explicit

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 24, i64 8, i64 0]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
subroutine mapType_allocatable_explicit
integer, allocatable :: a
allocate(a)
Expand Down Expand Up @@ -212,7 +212,7 @@ subroutine mapType_derived_explicit_nested_member_with_bounds
end subroutine mapType_derived_explicit_nested_member_with_bounds

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
subroutine mapType_derived_type_alloca()
type :: one_layer
real(4) :: i
Expand All @@ -233,7 +233,7 @@ subroutine mapType_derived_type_alloca()
end subroutine

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710659]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659]
subroutine mapType_alloca_derived_type()
type :: one_layer
real(4) :: i
Expand All @@ -256,7 +256,7 @@ subroutine mapType_alloca_derived_type()
end subroutine

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [8 x i64] [i64 0, i64 40, i64 8, i64 0, i64 48, i64 8, i64 0, i64 4]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710657, i64 281474976710659, i64 281474976710675, i64 281474976710659]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [8 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710661, i64 281474976710659, i64 281474976710675, i64 281474976710659]
subroutine mapType_alloca_nested_derived_type()
type :: middle_layer
real(4) :: i
Expand Down Expand Up @@ -287,7 +287,7 @@ subroutine mapType_alloca_nested_derived_type()
end subroutine

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 0]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710657, i64 281474976710659, i64 281474976710675]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710661, i64 281474976710659, i64 281474976710675]
subroutine mapType_nested_derived_type_alloca()
type :: middle_layer
real(4) :: i
Expand Down Expand Up @@ -316,7 +316,7 @@ subroutine mapType_nested_derived_type_alloca()
end subroutine

!CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [7 x i64] [i64 0, i64 64, i64 8, i64 0, i64 48, i64 8, i64 0]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976710657, i64 281474976710656, i64 281474976710672, i64 281474976710657, i64 281474976710659, i64 281474976710675]
!CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [7 x i64] [i64 32, i64 281474976710661, i64 281474976710656, i64 281474976710672, i64 281474976710661, i64 281474976710659, i64 281474976710675]
subroutine mapType_nested_derived_type_member_idx()
type :: vertexes
integer :: test
Expand Down
Loading