Skip to content

Commit 8e8dccd

Browse files
authored
[flang][cuda] Do not consider PINNED as device attribute (#95988)
PINNED is a CUDA data attribute meant for the host variables. Do not consider it when computing the number of device variables in assignment for the cuda data transfer.
1 parent f991ebb commit 8e8dccd

File tree

4 files changed

+45
-14
lines changed

4 files changed

+45
-14
lines changed

flang/include/flang/Evaluate/tools.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,12 +1231,13 @@ bool CheckForCoindexedObject(parser::ContextualMessages &,
12311231
const std::string &argName);
12321232

12331233
// Get the number of distinct symbols with CUDA attribute in the expression.
1234-
template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
1234+
template <typename A> inline int GetNbOfCUDADeviceSymbols(const A &expr) {
12351235
semantics::UnorderedSymbolSet symbols;
12361236
for (const Symbol &sym : CollectSymbols(expr)) {
12371237
if (const auto *details =
12381238
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
1239-
if (details->cudaDataAttr()) {
1239+
if (details->cudaDataAttr() &&
1240+
*details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
12401241
symbols.insert(sym);
12411242
}
12421243
}
@@ -1246,8 +1247,8 @@ template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
12461247

12471248
// Check if any of the symbols part of the expression has a CUDA data
12481249
// attribute.
1249-
template <typename A> inline bool HasCUDAAttrs(const A &expr) {
1250-
return GetNbOfCUDASymbols(expr) > 0;
1250+
template <typename A> inline bool HasCUDADeviceAttrs(const A &expr) {
1251+
return GetNbOfCUDADeviceSymbols(expr) > 0;
12511252
}
12521253

12531254
/// Check if the expression is a mix of host and device variables that require
@@ -1258,7 +1259,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
12581259
for (const Symbol &sym : CollectSymbols(expr)) {
12591260
if (const auto *details =
12601261
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
1261-
if (details->cudaDataAttr()) {
1262+
if (details->cudaDataAttr() &&
1263+
*details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
12621264
++deviceSymbols;
12631265
} else {
12641266
if (sym.owner().IsDerivedType()) {
@@ -1267,7 +1269,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
12671269
.GetSymbol()
12681270
->GetUltimate()
12691271
.detailsIf<semantics::ObjectEntityDetails>()) {
1270-
if (details->cudaDataAttr()) {
1272+
if (details->cudaDataAttr() &&
1273+
*details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
12711274
++deviceSymbols;
12721275
}
12731276
}

flang/lib/Lower/Bridge.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4107,8 +4107,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
41074107
void genCUDADataTransfer(fir::FirOpBuilder &builder, mlir::Location loc,
41084108
const Fortran::evaluate::Assignment &assign,
41094109
hlfir::Entity &lhs, hlfir::Entity &rhs) {
4110-
bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs);
4111-
bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs);
4110+
bool lhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs);
4111+
bool rhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs);
41124112

41134113
auto getRefIfLoaded = [](mlir::Value val) -> mlir::Value {
41144114
if (auto loadOp =
@@ -4177,7 +4177,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
41774177
if (const auto *details =
41784178
sym.GetUltimate()
41794179
.detailsIf<Fortran::semantics::ObjectEntityDetails>()) {
4180-
if (details->cudaDataAttr()) {
4180+
if (details->cudaDataAttr() &&
4181+
*details->cudaDataAttr() != Fortran::common::CUDADataAttr::Pinned) {
41814182
if (sym.owner().IsDerivedType() && IsAllocatable(sym.GetUltimate()))
41824183
TODO(loc, "Device resident allocatable derived-type component");
41834184
// TODO: This should probably being checked in semantic and give a
@@ -4229,8 +4230,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
42294230
fir::FirOpBuilder &builder = getFirOpBuilder();
42304231

42314232
bool isInDeviceContext = isDeviceContext(builder);
4232-
bool isCUDATransfer = (Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
4233-
Fortran::evaluate::HasCUDAAttrs(assign.rhs)) &&
4233+
bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
4234+
Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
42344235
!isInDeviceContext;
42354236
bool hasCUDAImplicitTransfer =
42364237
Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);

flang/lib/Semantics/check-cuda.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -548,8 +548,8 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
548548
return;
549549
}
550550

551-
int nbLhs{evaluate::GetNbOfCUDASymbols(assign->lhs)};
552-
int nbRhs{evaluate::GetNbOfCUDASymbols(assign->rhs)};
551+
int nbLhs{evaluate::GetNbOfCUDADeviceSymbols(assign->lhs)};
552+
int nbRhs{evaluate::GetNbOfCUDADeviceSymbols(assign->rhs)};
553553

554554
// device to host transfer with more than one device object on the rhs is not
555555
// legal.

flang/test/Lower/CUDA/cuda-data-transfer.cuf

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ end subroutine
180180
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
181181
! CHECK: cuf.data_transfer %[[A]]#0 to %[[C]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
182182

183-
184183
subroutine sub8(a, b, n)
185184
integer :: n
186185
integer, device :: a(n)
@@ -195,3 +194,31 @@ end subroutine
195194
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
196195
! CHECK: cuf.data_transfer %[[A]]#0 to %[[B]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<10xi32>>
197196
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.box<!fir.array<?xi32>>
197+
198+
subroutine sub9(a)
199+
integer, pinned, allocatable :: a(:)
200+
do concurrent (i = 1 : 10)
201+
a(i) = 10 + a(i)
202+
end do
203+
end subroutine
204+
205+
! CHECK-LABEL: func.func @_QPsub9
206+
! CHECK-NOT: cuf.data_transfer
207+
208+
subroutine sub10(a, b)
209+
integer, device :: a
210+
integer, allocatable, pinned :: b
211+
integer :: res
212+
213+
res = a + b
214+
end subroutine
215+
216+
217+
218+
! CHECK-LABEL: func.func @_QPsub10(
219+
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}
220+
221+
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub10Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
222+
! CHECK: cuf.data_transfer %[[A]]#1 to %{{.*}}#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
223+
! CHECK-NOT: cuf.data_transfer
224+

0 commit comments

Comments
 (0)