|
| 1 | +//==----------- bf1oat16 devicelib dlopen test for SYCL JIT ----------------==// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | + |
| 9 | +// The case uses dlopen/close to load/unload a sycl shared library which |
| 10 | +// depends bfloat16 device library and the main function also includes sycl |
| 11 | +// kernels which depend on bfloat16 device library. SYCL program manager will |
| 12 | +// own the bfloat16 device library image which is shared by all kernels using |
| 13 | +// bfloat16 features, so the program should also work well when the shared |
| 14 | +// library is dlclosed and the device images are removed. |
| 15 | + |
| 16 | +// REQUIRES: linux |
| 17 | + |
| 18 | +// RUN: %{build} -DBUILD_LIB -fPIC -shared -o %T/lib%basename_t.so |
| 19 | + |
| 20 | +// RUN: %{build} -DFNAME=%basename_t -ldl -Wl,-rpath=%T -o %t1.out |
| 21 | + |
| 22 | +// RUN: %{run} %t1.out |
| 23 | + |
| 24 | +// UNSUPPORTED: target-nvidia || target-amd |
| 25 | +// UNSUPPORTED-INTENDED: bfloat16 device library is not used on AMD and Nvidia. |
| 26 | + |
| 27 | +#include <sycl/detail/core.hpp> |
| 28 | +#include <sycl/ext/oneapi/bfloat16.hpp> |
| 29 | +#include <sycl/kernel_bundle.hpp> |
| 30 | + |
| 31 | +#include <dlfcn.h> |
| 32 | +#include <iostream> |
| 33 | + |
| 34 | +using namespace sycl; |
| 35 | + |
| 36 | +constexpr access::mode sycl_read = access::mode::read; |
| 37 | +constexpr access::mode sycl_write = access::mode::write; |
| 38 | + |
| 39 | +using BFP = sycl::ext::oneapi::bfloat16; |
| 40 | + |
| 41 | +#ifdef BUILD_LIB |
| 42 | +class FOO_KERN; |
| 43 | +void foo() { |
| 44 | + queue deviceQueue; |
| 45 | + BFP bf16_v; |
| 46 | + float fp32_v = 16.5f; |
| 47 | + { |
| 48 | + buffer<float, 1> fp32_buffer{&fp32_v, 1}; |
| 49 | + buffer<BFP, 1> bf16_buffer{&bf16_v, 1}; |
| 50 | + deviceQueue |
| 51 | + .submit([&](handler &cgh) { |
| 52 | + auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh); |
| 53 | + auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh); |
| 54 | + cgh.single_task<FOO_KERN>([=]() { bf16_acc[0] = BFP{fp32_acc[0]}; }); |
| 55 | + }) |
| 56 | + .wait(); |
| 57 | + } |
| 58 | + std::cout << "In foo: " << bf16_v << std::endl; |
| 59 | +} |
| 60 | +#else |
| 61 | + |
| 62 | +class MAINRUN; |
| 63 | +void main_run(queue &deviceQueue) { |
| 64 | + BFP bf16_v; |
| 65 | + float fp32_v = 16.5f; |
| 66 | + { |
| 67 | + buffer<float, 1> fp32_buffer{&fp32_v, 1}; |
| 68 | + buffer<BFP, 1> bf16_buffer{&bf16_v, 1}; |
| 69 | + deviceQueue |
| 70 | + .submit([&](handler &cgh) { |
| 71 | + auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh); |
| 72 | + auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh); |
| 73 | + cgh.single_task<class MAINRUN>( |
| 74 | + [=]() { bf16_acc[0] = BFP{fp32_acc[0] + 0.5f}; }); |
| 75 | + }) |
| 76 | + .wait(); |
| 77 | + } |
| 78 | + std::cout << "In run: " << bf16_v << std::endl; |
| 79 | +} |
| 80 | + |
| 81 | +#define STRINGIFY_HELPER(A) #A |
| 82 | +#define STRINGIFY(A) STRINGIFY_HELPER(A) |
| 83 | +#define SO_FNAME "lib" STRINGIFY(FNAME) ".so" |
| 84 | + |
| 85 | +int main() { |
| 86 | + BFP bf16_array[3]; |
| 87 | + float fp32_array[3] = {7.0f, 8.5f, 0.5f}; |
| 88 | + queue deviceQueue; |
| 89 | + std::vector<sycl::kernel_id> all_kernel_ids; |
| 90 | + bool dynlib_kernel_available = false; |
| 91 | + bool dynlib_kernel_unavailable = true; |
| 92 | + main_run(deviceQueue); |
| 93 | + |
| 94 | + void *handle = dlopen(SO_FNAME, RTLD_LAZY); |
| 95 | + void (*func)(); |
| 96 | + *(void **)(&func) = dlsym(handle, "_Z3foov"); |
| 97 | + func(); |
| 98 | + all_kernel_ids = sycl::get_kernel_ids(); |
| 99 | + for (auto k : all_kernel_ids) { |
| 100 | + if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN")) |
| 101 | + dynlib_kernel_available = true; |
| 102 | + } |
| 103 | + |
| 104 | + // Before dlclose, the FOO_KERN from sycl dynamic library must exist. |
| 105 | + assert(dynlib_kernel_available); |
| 106 | + |
| 107 | + dlclose(handle); |
| 108 | + |
| 109 | + all_kernel_ids = sycl::get_kernel_ids(); |
| 110 | + for (auto k : all_kernel_ids) { |
| 111 | + if (k.get_name() && std::strstr(k.get_name(), "FOO_KERN")) |
| 112 | + dynlib_kernel_unavailable = false; |
| 113 | + } |
| 114 | + |
| 115 | + assert(dynlib_kernel_unavailable); |
| 116 | + |
| 117 | + { |
| 118 | + buffer<float, 1> fp32_buffer{fp32_array, 3}; |
| 119 | + buffer<BFP, 1> bf16_buffer{bf16_array, 3}; |
| 120 | + deviceQueue |
| 121 | + .submit([&](handler &cgh) { |
| 122 | + auto fp32_acc = fp32_buffer.get_access<sycl_read>(cgh); |
| 123 | + auto bf16_acc = bf16_buffer.get_access<sycl_write>(cgh); |
| 124 | + cgh.single_task([=]() { |
| 125 | + bf16_acc[0] = BFP{fp32_acc[0]}; |
| 126 | + bf16_acc[1] = BFP{fp32_acc[1]}; |
| 127 | + bf16_acc[2] = BFP{fp32_acc[2]}; |
| 128 | + }); |
| 129 | + }) |
| 130 | + .wait(); |
| 131 | + } |
| 132 | + std::cout << "In main: " << bf16_array[0] << " " << bf16_array[1] << " " |
| 133 | + << bf16_array[2] << std::endl; |
| 134 | + |
| 135 | + return 0; |
| 136 | +} |
| 137 | +#endif |
0 commit comments