Skip to content

Commit ded6f32

Browse files
authored
[SYCL] Test bfloat16 devicelib with dlopen/dlclose (#18106)
Add test to cover bfloat16 devicelib usage in dlopen/dlclose scenario. --------- Signed-off-by: jinge90 <[email protected]>
1 parent 205449a commit ded6f32

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
//==----------- bf1oat16 devicelib dlopen test for SYCL JIT ----------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// The case uses dlopen/close to load/unload a sycl shared library which
10+
// depends bfloat16 device library and the main function also includes sycl
11+
// kernels which depend on bfloat16 device library. SYCL program manager will
12+
// own the bfloat16 device library image which is shared by all kernels using
13+
// bfloat16 features, so the program should also work well when the shared
14+
// library is dlclosed and the device images are removed.
15+
16+
// REQUIRES: linux
17+
18+
// RUN: %{build} -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t.so
19+
20+
// RUN: %{build} -DFNAME=%basename_t -ldl -Wl,-rpath=%T -o %t1.out
21+
22+
// RUN: %{run} %t1.out
23+
24+
// UNSUPPORTED: target-nvidia || target-amd
25+
// UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia.
26+
27+
#include <sycl/detail/core.hpp>
28+
#include <sycl/ext/oneapi/bfloat16.hpp>
29+
#include <sycl/kernel_bundle.hpp>
30+
31+
#include <dlfcn.h>
32+
#include <iostream>
33+
34+
using namespace sycl;
35+
36+
constexpr access::mode sycl_read = access::mode::read;
37+
constexpr access::mode sycl_write = access::mode::write;
38+
39+
using BFP = sycl::ext::oneapi::bfloat16;
40+
41+
#ifdef BUILD_LIB
42+
class FOO_KERN;
43+
void foo() {
44+
queue deviceQueue;
45+
BFP bf16_v;
46+
float fp32_v = 16.5f;
47+
{
48+
buffer<float, 1> fp32_buffer{&fp32_v, 1};
49+
buffer<BFP, 1> bf16_buffer{&bf16_v, 1};
50+
deviceQueue
51+
.submit([&](handler &cgh) {
52+
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
53+
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
54+
cgh.single_task<FOO_KERN>([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; });
55+
})
56+
.wait();
57+
}
58+
std::cout << "In foo: " << bf16_v << std::endl;
59+
}
60+
#else
61+
62+
class MAINRUN;
63+
void main_run(queue &deviceQueue) {
64+
BFP bf16_v;
65+
float fp32_v = 16.5f;
66+
{
67+
buffer<float, 1> fp32_buffer{&fp32_v, 1};
68+
buffer<BFP, 1> bf16_buffer{&bf16_v, 1};
69+
deviceQueue
70+
.submit([&](handler &cgh) {
71+
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
72+
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
73+
cgh.single_task<class MAINRUN>(
74+
[=]() { bf16_acc[0] = BFP{fp32_acc[0] + 0.5f}; });
75+
})
76+
.wait();
77+
}
78+
std::cout << "In run: " << bf16_v << std::endl;
79+
}
80+
81+
#define STRINGIFY_HELPER(A) #A
82+
#define STRINGIFY(A) STRINGIFY_HELPER(A)
83+
#define SO_FNAME "lib" STRINGIFY(FNAME) ".so"
84+
85+
int main() {
86+
BFP bf16_array[3];
87+
float fp32_array[3] = {7.0f, 8.5f, 0.5f};
88+
queue deviceQueue;
89+
std::vector<sycl::kernel_id> all_kernel_ids;
90+
bool dynlib_kernel_available = false;
91+
bool dynlib_kernel_unavailable = true;
92+
main_run(deviceQueue);
93+
94+
void *handle = dlopen(SO_FNAME, RTLD_LAZY);
95+
void (*func)();
96+
*(void **)(&func) = dlsym(handle, "_Z3foov");
97+
func();
98+
all_kernel_ids = sycl::get_kernel_ids();
99+
for (auto k : all_kernel_ids) {
100+
if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN"))
101+
dynlib_kernel_available = true;
102+
}
103+
104+
// Before dlclose, the FOO_KERN from sycl dynamic library must exist.
105+
assert(dynlib_kernel_available);
106+
107+
dlclose(handle);
108+
109+
all_kernel_ids = sycl::get_kernel_ids();
110+
for (auto k : all_kernel_ids) {
111+
if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN"))
112+
dynlib_kernel_unavailable = false;
113+
}
114+
115+
assert(dynlib_kernel_unavailable);
116+
117+
{
118+
buffer<float, 1> fp32_buffer{fp32_array, 3};
119+
buffer<BFP, 1> bf16_buffer{bf16_array, 3};
120+
deviceQueue
121+
.submit([&](handler &cgh) {
122+
auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh);
123+
auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh);
124+
cgh.single_task([=]() {
125+
bf16_acc[0] = BFP{fp32_acc[0]};
126+
bf16_acc[1] = BFP{fp32_acc[1]};
127+
bf16_acc[2] = BFP{fp32_acc[2]};
128+
});
129+
})
130+
.wait();
131+
}
132+
std::cout << "In main: " << bf16_array[0] << " " << bf16_array[1] << " "
133+
<< bf16_array[2] << std::endl;
134+
135+
return 0;
136+
}
137+
#endif

0 commit comments

Comments
 (0)