Skip to content

[flang][cuda] Simplify data transfer when possible #106120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 32 additions & 13 deletions flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4251,15 +4251,37 @@ class FirConverter : public Fortran::lower::AbstractConverter {
bool lhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs);
bool rhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs);

auto getRefIfLoaded = [](mlir::Value val) -> mlir::Value {
auto getRefFromValue = [](mlir::Value val) -> mlir::Value {
if (auto loadOp =
mlir::dyn_cast_or_null<fir::LoadOp>(val.getDefiningOp()))
return loadOp.getMemref();
if (!mlir::isa<fir::BaseBoxType>(val.getType()))
return val;
if (auto declOp =
mlir::dyn_cast_or_null<hlfir::DeclareOp>(val.getDefiningOp())) {
if (!declOp.getShape())
return val;
if (mlir::isa<fir::ReferenceType>(declOp.getMemref().getType()))
return declOp.getMemref();
}
return val;
};

mlir::Value rhsVal = getRefIfLoaded(rhs.getBase());
mlir::Value lhsVal = getRefIfLoaded(lhs.getBase());
auto getShapeFromDecl = [](mlir::Value val) -> mlir::Value {
if (!mlir::isa<fir::BaseBoxType>(val.getType()))
return {};
if (auto declOp =
mlir::dyn_cast_or_null<hlfir::DeclareOp>(val.getDefiningOp()))
return declOp.getShape();
return {};
};

mlir::Value rhsVal = getRefFromValue(rhs.getBase());
mlir::Value lhsVal = getRefFromValue(lhs.getBase());
// Get shape from the rhs if available otherwise get it from lhs.
mlir::Value shape = getShapeFromDecl(rhs.getBase());
if (!shape)
shape = getShapeFromDecl(lhs.getBase());

// device = host
if (lhsIsDevice && !rhsIsDevice) {
Expand All @@ -4272,19 +4294,18 @@ class FirConverter : public Fortran::lower::AbstractConverter {
base = convertOp.getValue();
// Special case if the rhs is a constant.
if (matchPattern(base.getDefiningOp(), mlir::m_Constant())) {
builder.create<cuf::DataTransferOp>(
loc, base, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr);
builder.create<cuf::DataTransferOp>(loc, base, lhsVal, shape,
transferKindAttr);
} else {
auto associate = hlfir::genAssociateExpr(
loc, builder, rhs, rhs.getType(), ".cuf_host_tmp");
builder.create<cuf::DataTransferOp>(loc, associate.getBase(), lhsVal,
/*shape=*/mlir::Value{},
transferKindAttr);
shape, transferKindAttr);
builder.create<hlfir::EndAssociateOp>(loc, associate);
}
} else {
builder.create<cuf::DataTransferOp>(
loc, rhsVal, lhsVal, /*shape=*/mlir::Value{}, transferKindAttr);
builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal, shape,
transferKindAttr);
}
return;
}
Expand All @@ -4293,8 +4314,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (!lhsIsDevice && rhsIsDevice) {
auto transferKindAttr = cuf::DataTransferKindAttr::get(
builder.getContext(), cuf::DataTransferKind::DeviceHost);
builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal,
/*shape=*/mlir::Value{},
builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal, shape,
transferKindAttr);
return;
}
Expand All @@ -4304,8 +4324,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
assert(rhs.isVariable() && "CUDA Fortran assignment rhs is not legal");
auto transferKindAttr = cuf::DataTransferKindAttr::get(
builder.getContext(), cuf::DataTransferKind::DeviceDevice);
builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal,
/*shape=*/mlir::Value{},
builder.create<cuf::DataTransferOp>(loc, rhsVal, lhsVal, shape,
transferKindAttr);
return;
}
Expand Down
4 changes: 3 additions & 1 deletion flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,11 @@ llvm::LogicalResult cuf::DataTransferOp::verify() {
if (fir::isa_trivial(srcTy) &&
matchPattern(getSrc().getDefiningOp(), mlir::m_Constant()))
return mlir::success();

return emitOpError()
<< "expect src and dst to be references or descriptors or src to "
"be a constant";
"be a constant: "
<< srcTy << " - " << dstTy;
}

//===----------------------------------------------------------------------===//
Expand Down
19 changes: 17 additions & 2 deletions flang/test/Lower/CUDA/cuda-data-transfer.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ contains
function dev1(a)
integer, device :: a(:)
integer :: dev1
dev1 = 1
end function
end

Expand Down Expand Up @@ -198,8 +199,8 @@ end subroutine
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "b"}, %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "n"})
! CHECK: %[[B:.*]]:2 = hlfir.declare %[[ARG1]](%{{.*}}) dummy_scope %{{.*}} {uniq_name = "_QFsub8Eb"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
! CHECK: cuf.data_transfer %[[A]]#0 to %[[B]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.box<!fir.array<?xi32>>
! CHECK: cuf.data_transfer %[[ARG0]] to %[[B]]#0, %{{.*}} : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<!fir.array<?xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[B]]#0 to %[[ARG0]], %{{.*}} : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<?xi32>>

subroutine sub9(a)
integer, pinned, allocatable :: a(:)
Expand Down Expand Up @@ -274,3 +275,17 @@ end subroutine
! CHECK-LABEL: func.func @_QPsub14()
! CHECK: %[[TRUE:.*]] = arith.constant true
! CHECK: cuf.data_transfer %[[TRUE]] to %{{.*}}#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : i1, !fir.ref<!fir.array<10x!fir.logical<4>>>

subroutine sub15(a_dev, a_host, n, m)
integer, intent(in) :: n, m
real, device :: a_dev(n*m)
real :: a_host(n*m)

a_dev = a_host
end subroutine

! CHECK-LABEL: func.func @_QPsub15(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a_dev"}, %[[ARG1:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "a_host"}
! CHECK: %{{.*}} = fir.shape %{{.*}} : (index) -> !fir.shape<1>
! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
! CHECK: cuf.data_transfer %[[ARG1]] to %[[ARG0]], %[[SHAPE]] : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>
Loading