Skip to content

Commit 1417633

Browse files
authored
[flang][cuda] Add CUF allocator (#101216)
Add allocators for CUDA fortran allocation on the device. 3 allocators are added for pinned, device and managed/unified memory allocation. `CUFRegisterAllocator()` is called to register the allocators in the allocator registry added in #100690. Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to conditionally build these.
1 parent d6649f2 commit 1417633

File tree

8 files changed

+238
-0
lines changed

8 files changed

+238
-0
lines changed

flang/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
461461
if (FLANG_BUILD_TOOLS)
462462
add_subdirectory(tools)
463463
endif()
464+
465+
option(FLANG_CUF_RUNTIME
466+
"Compile CUDA Fortran runtime sources" OFF)
467+
if (FLANG_CUF_RUNTIME)
468+
find_package(CUDAToolkit REQUIRED)
469+
endif()
470+
464471
add_subdirectory(runtime)
465472

466473
if (LLVM_INCLUDE_EXAMPLES)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
10+
#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
11+
12+
#include "flang/Runtime/descriptor.h"
13+
14+
static constexpr unsigned kPinnedAllocatorPos = 1;
15+
static constexpr unsigned kDeviceAllocatorPos = 2;
16+
static constexpr unsigned kManagedAllocatorPos = 3;
17+
18+
#define CUDA_REPORT_IF_ERROR(expr) \
19+
[](CUresult result) { \
20+
if (!result) \
21+
return; \
22+
const char *name = nullptr; \
23+
cuGetErrorName(result, &name); \
24+
if (!name) \
25+
name = "<unknown>"; \
26+
Terminator terminator{__FILE__, __LINE__}; \
27+
terminator.Crash("'%s' failed with '%s'", #expr, name); \
28+
}(expr)
29+
30+
namespace Fortran::runtime::cuf {
31+
32+
void CUFRegisterAllocator();
33+
34+
void *CUFAllocPinned(std::size_t);
35+
void CUFFreePinned(void *);
36+
37+
void *CUFAllocDevice(std::size_t);
38+
void CUFFreeDevice(void *);
39+
40+
void *CUFAllocManaged(std::size_t);
41+
void CUFFreeManaged(void *);
42+
43+
} // namespace Fortran::runtime::cuf
44+
#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_

flang/runtime/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
309309
add_dependencies(FortranRuntime flang-new module_files)
310310
endif()
311311

312+
if (FLANG_CUF_RUNTIME)
313+
add_subdirectory(CUDA)
314+
endif()

flang/runtime/CUDA/CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===#
2+
#
3+
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
#===------------------------------------------------------------------------===#
8+
9+
include_directories(${CUDAToolkit_INCLUDE_DIRS})
10+
find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
11+
12+
add_flang_library(CufRuntime
13+
allocator.cpp
14+
)
15+
target_link_libraries(CufRuntime
16+
PRIVATE
17+
FortranRuntime
18+
${CUDA_RUNTIME_LIBRARY}
19+
)

flang/runtime/CUDA/allocator.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
//===-- runtime/CUDA/allocator.cpp ----------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "flang/Runtime/CUDA/allocator.h"
10+
#include "../allocator-registry.h"
11+
#include "../derived.h"
12+
#include "../stat.h"
13+
#include "../terminator.h"
14+
#include "../type-info.h"
15+
#include "flang/Common/Fortran.h"
16+
#include "flang/ISO_Fortran_binding_wrapper.h"
17+
18+
#include "cuda.h"
19+
20+
namespace Fortran::runtime::cuf {
21+
22+
void CUFRegisterAllocator() {
23+
allocatorRegistry.Register(
24+
kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
25+
allocatorRegistry.Register(
26+
kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
27+
allocatorRegistry.Register(
28+
kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
29+
}
30+
31+
void *CUFAllocPinned(std::size_t sizeInBytes) {
32+
void *p;
33+
CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
34+
return p;
35+
}
36+
37+
void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
38+
39+
void *CUFAllocDevice(std::size_t sizeInBytes) {
40+
CUdeviceptr p = 0;
41+
CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
42+
return reinterpret_cast<void *>(p);
43+
}
44+
45+
void CUFFreeDevice(void *p) {
46+
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
47+
}
48+
49+
void *CUFAllocManaged(std::size_t sizeInBytes) {
50+
CUdeviceptr p = 0;
51+
CUDA_REPORT_IF_ERROR(
52+
cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
53+
return reinterpret_cast<void *>(p);
54+
}
55+
56+
void CUFFreeManaged(void *p) {
57+
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
58+
}
59+
60+
} // namespace Fortran::runtime::cuf

flang/unittests/Runtime/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests
3535
PRIVATE
3636
FortranRuntime
3737
)
38+
39+
add_subdirectory(CUDA)
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "gtest/gtest.h"
10+
#include "../../../runtime/terminator.h"
11+
#include "flang/Common/Fortran.h"
12+
#include "flang/Runtime/CUDA/allocator.h"
13+
#include "flang/Runtime/allocatable.h"
14+
15+
#include "cuda.h"
16+
17+
using namespace Fortran::runtime;
18+
19+
static OwningPtr<Descriptor> createAllocatable(
20+
Fortran::common::TypeCategory tc, int kind, int rank = 1) {
21+
return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
22+
CFI_attribute_allocatable);
23+
}
24+
25+
thread_local static int32_t defaultDevice = 0;
26+
27+
CUdevice getDefaultCuDevice() {
28+
CUdevice device;
29+
CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
30+
return device;
31+
}
32+
33+
class ScopedContext {
34+
public:
35+
ScopedContext() {
36+
// Static reference to CUDA primary context for device ordinal
37+
// defaultDevice.
38+
static CUcontext context = [] {
39+
CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
40+
CUcontext ctx;
41+
// Note: this does not affect the current context.
42+
CUDA_REPORT_IF_ERROR(
43+
cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
44+
return ctx;
45+
}();
46+
47+
CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
48+
}
49+
50+
~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
51+
};
52+
53+
TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
54+
using Fortran::common::TypeCategory;
55+
Fortran::runtime::cuf::CUFRegisterAllocator();
56+
ScopedContext ctx;
57+
// REAL(4), DEVICE, ALLOCATABLE :: a(:)
58+
auto a{createAllocatable(TypeCategory::Real, 4)};
59+
a->SetAllocIdx(kDeviceAllocatorPos);
60+
EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
61+
EXPECT_FALSE(a->HasAddendum());
62+
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
63+
RTNAME(AllocatableAllocate)
64+
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
65+
EXPECT_TRUE(a->IsAllocated());
66+
RTNAME(AllocatableDeallocate)
67+
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
68+
EXPECT_FALSE(a->IsAllocated());
69+
}
70+
71+
TEST(AllocatableCUFTest, SimplePinnedAllocate) {
72+
using Fortran::common::TypeCategory;
73+
Fortran::runtime::cuf::CUFRegisterAllocator();
74+
ScopedContext ctx;
75+
// INTEGER(4), PINNED, ALLOCATABLE :: a(:)
76+
auto a{createAllocatable(TypeCategory::Integer, 4)};
77+
EXPECT_FALSE(a->HasAddendum());
78+
a->SetAllocIdx(kPinnedAllocatorPos);
79+
EXPECT_EQ((int)kPinnedAllocatorPos, a->GetAllocIdx());
80+
EXPECT_FALSE(a->HasAddendum());
81+
RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
82+
RTNAME(AllocatableAllocate)
83+
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
84+
EXPECT_TRUE(a->IsAllocated());
85+
RTNAME(AllocatableDeallocate)
86+
(*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
87+
EXPECT_FALSE(a->IsAllocated());
88+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
if (FLANG_CUF_RUNTIME)
2+
3+
add_flang_unittest(FlangCufRuntimeTests
4+
AllocatorCUF.cpp
5+
)
6+
7+
target_link_libraries(FlangCufRuntimeTests
8+
PRIVATE
9+
CufRuntime
10+
FortranRuntime
11+
)
12+
13+
target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
14+
15+
endif()

0 commit comments

Comments
 (0)