Skip to content

[OpenMP][libomptarget] Enable lazy device initialization #76832

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions openmp/libomptarget/include/PluginManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ struct PluginAdaptorTy {
/// Return the number of devices visible to the underlying plugin.
int32_t getNumberOfPluginDevices() const { return NumberOfPluginDevices; }

/// Return the number of devices successfully initialized and visible to the
/// user.
int32_t getNumberOfUserDevices() const { return NumberOfUserDevices; }

/// Add all offload entries described by \p DI to the devices managed by this
/// plugin.
void addOffloadEntries(DeviceImageTy &DI);
Expand All @@ -82,6 +78,8 @@ struct PluginAdaptorTy {

llvm::DenseSet<const __tgt_device_image *> UsedImages;

bool LazyDeviceInitialization;

private:
/// Number of devices the underling plugins sees.
int32_t NumberOfPluginDevices = -1;
Expand All @@ -108,6 +106,9 @@ struct PluginManager {
/// Exclusive accessor type for the device container.
using ExclusiveDevicesAccessorTy = Accessor<DeviceContainerTy>;

/// Keep track of the number of initialized devices:
int32_t NumberOfInitializedDevices = 0;

PluginManager() {}

void init();
Expand All @@ -124,7 +125,8 @@ struct PluginManager {

/// Return the device presented to the user as device \p DeviceNo if it is
/// initialized and ready. Otherwise return an error explaining the problem.
llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo);
llvm::Expected<DeviceTy &> getDevice(uint32_t DeviceNo,
bool WithoutInit = false);

/// Iterate over all initialized and ready devices registered with this
/// plugin.
Expand Down
2 changes: 2 additions & 0 deletions openmp/libomptarget/include/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ struct DeviceTy {
PluginAdaptorTy *RTL;
int32_t RTLDeviceID;

bool IsInit;

bool HasMappedGlobalData = false;

PendingCtorsDtorsPerLibrary PendingCtorsDtors;
Expand Down
62 changes: 47 additions & 15 deletions openmp/libomptarget/src/PluginManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ PluginAdaptorTy::create(const std::string &Name) {

PluginAdaptorTy::PluginAdaptorTy(const std::string &Name,
std::unique_ptr<llvm::sys::DynamicLibrary> DL)
: Name(Name), LibraryHandler(std::move(DL)) {}
: Name(Name), LibraryHandler(std::move(DL)),
LazyDeviceInitialization(false) {}

Error PluginAdaptorTy::init() {

Expand Down Expand Up @@ -84,14 +85,19 @@ Error PluginAdaptorTy::init() {
"No devices supported in this RTL\n");
}

if (char *EnvStr = getenv("LIBOMPTARGET_LAZY_DEVICE_INIT")) {
LazyDeviceInitialization = std::stoi(EnvStr);
DP("Using lazy device initialization!\n");
}

DP("Registered '%s' with %d plugin visible devices!\n", Name.c_str(),
NumberOfPluginDevices);
return Error::success();
}

void PluginAdaptorTy::addOffloadEntries(DeviceImageTy &DI) {
for (int32_t I = 0, E = getNumberOfUserDevices(); I < E; ++I) {
auto DeviceOrErr = PM->getDevice(DeviceOffset + I);
for (int32_t I = 0, E = getNumberOfPluginDevices(); I < E; ++I) {
auto DeviceOrErr = PM->getDevice(DeviceOffset + I, /*WithoutInit*/ true);
if (!DeviceOrErr)
FATAL_MESSAGE(DeviceOffset + I, "%s",
toString(DeviceOrErr.takeError()).c_str());
Expand Down Expand Up @@ -142,21 +148,28 @@ void PluginAdaptorTy::initDevices(PluginManager &PM) {
if (set_device_offset)
set_device_offset(DeviceOffset);

// Perform all the actions we normally perform even under lazy initialization.
// The only exception is that we postpone initializing the device itself until
// it is being used.
int32_t NumPD = getNumberOfPluginDevices();
ExclusiveDevicesAccessor->reserve(DeviceOffset + NumPD);
for (int32_t PDevI = 0, UserDevId = DeviceOffset; PDevI < NumPD; PDevI++) {
auto Device = std::make_unique<DeviceTy>(this, UserDevId, PDevI);
if (auto Err = Device->init()) {
DP("Skip plugin known device %d: %s\n", PDevI,
toString(std::move(Err)).c_str());
continue;
if (!LazyDeviceInitialization) {
if (auto Err = Device->init()) {
DP("Skip plugin known device %d: %s\n", PDevI,
toString(std::move(Err)).c_str());
continue;
}
++NumberOfUserDevices;
}

ExclusiveDevicesAccessor->push_back(std::move(Device));
++NumberOfUserDevices;
++UserDevId;
}

// Note: when the devices are lazily initialized, the number of exposed
// devices below is zero.
DP("Plugin adaptor " DPxMOD " has index %d, exposes %d out of %d devices!\n",
DPxPTR(LibraryHandler.get()), DeviceOffset, NumberOfUserDevices,
NumberOfPluginDevices);
Expand All @@ -178,15 +191,15 @@ static void registerImageIntoTranslationTable(TranslationTable &TT,
// Resize the Targets Table and Images to accommodate the new targets if
// required
unsigned TargetsTableMinimumSize =
RTL.DeviceOffset + RTL.getNumberOfUserDevices();
RTL.DeviceOffset + RTL.getNumberOfPluginDevices();

if (TT.TargetsTable.size() < TargetsTableMinimumSize) {
TT.TargetsImages.resize(TargetsTableMinimumSize, 0);
TT.TargetsTable.resize(TargetsTableMinimumSize, 0);
}

// Register the image in all devices for this target type.
for (int32_t I = 0; I < RTL.getNumberOfUserDevices(); ++I) {
// Register the image in all possible devices for this target type.
for (int32_t I = 0; I < RTL.getNumberOfPluginDevices(); ++I) {
// If we are changing the image we are also invalidating the target table.
if (TT.TargetsImages[RTL.DeviceOffset + I] != Image) {
TT.TargetsImages[RTL.DeviceOffset + I] = Image;
Expand Down Expand Up @@ -291,8 +304,9 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {

// Execute dtors for static objects if the device has been used, i.e.
// if its PendingCtors list has been emptied.
for (int32_t I = 0; I < FoundRTL->getNumberOfUserDevices(); ++I) {
auto DeviceOrErr = PM->getDevice(FoundRTL->DeviceOffset + I);
for (int32_t I = 0; I < FoundRTL->getNumberOfPluginDevices(); ++I) {
auto DeviceOrErr =
PM->getDevice(FoundRTL->DeviceOffset + I, /*WithoutInit*/ true);
if (!DeviceOrErr)
FATAL_MESSAGE(FoundRTL->DeviceOffset + I, "%s",
toString(DeviceOrErr.takeError()).c_str());
Expand Down Expand Up @@ -357,13 +371,31 @@ void PluginManager::unregisterLib(__tgt_bin_desc *Desc) {
DP("Done unregistering library!\n");
}

Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo) {
Expected<DeviceTy &> PluginManager::getDevice(uint32_t DeviceNo,
bool WithoutInit) {
auto ExclusiveDevicesAccessor = getExclusiveDevicesAccessor();
if (DeviceNo >= ExclusiveDevicesAccessor->size())
return createStringError(
inconvertibleErrorCode(),
"Device number '%i' out of range, only %i devices available", DeviceNo,
ExclusiveDevicesAccessor->size());

return *(*ExclusiveDevicesAccessor)[DeviceNo];
DeviceTy &Device = *(*ExclusiveDevicesAccessor)[DeviceNo];

// If the device is initialized eagerly then IsInit will be true already and
// the whole initialization of the device will be skipped. In some cases, such
// as when we register the offload entries, we also want to make sure that the
// device is fetched without the initialization being even considered. For
// this we set WithoutInit to true.
if (!WithoutInit && !Device.IsInit) {
if (auto Err = Device.init()) {
DP("Failed to init device %d: %s\n", DeviceNo,
toString(std::move(Err)).c_str());
return createStringError(inconvertibleErrorCode(),
"Failed to init device %d\n", DeviceNo);
}
DP("Device %d (local ID %d) has been lazily initialized! (IsInit = %d)\n",
DeviceNo, Device.RTLDeviceID, Device.IsInit);
}
return Device;
}
7 changes: 6 additions & 1 deletion openmp/libomptarget/src/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
}

DeviceTy::DeviceTy(PluginAdaptorTy *RTL, int32_t DeviceID, int32_t RTLDeviceID)
: DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID),
: DeviceID(DeviceID), RTL(RTL), RTLDeviceID(RTLDeviceID), IsInit(false),
PendingCtorsDtors(), PendingGlobalsMtx(), MappingInfo(*this) {}

DeviceTy::~DeviceTy() {
Expand All @@ -77,6 +77,10 @@ DeviceTy::~DeviceTy() {
}

llvm::Error DeviceTy::init() {
// If device is already initialized then return success:
if (IsInit)
return llvm::Error::success();

// Make call to init_requires if it exists for this plugin.
int32_t Ret = 0;
if (RTL->init_requires)
Expand All @@ -103,6 +107,7 @@ llvm::Error DeviceTy::init() {
OMPX_ReplaySaveOutput, ReqPtrArgOffset);
}

IsInit = true;
return llvm::Error::success();
}

Expand Down
34 changes: 34 additions & 0 deletions openmp/libomptarget/test/offloading/lazy_device_init.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// clang-format off
// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_LAZY_DEVICE_INIT=1 LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 | %fcheck-generic
// clang-format on

// REQUIRES: libomptarget-debug

// UNSUPPORTED: nvptx64-nvidia-cuda
// UNSUPPORTED: nvptx64-nvidia-cuda-LTO

#include <stdio.h>
#include <stdlib.h>

int main() {
int *a = (int *)malloc(sizeof(int) * 10);

// clang-format off
// CHECK: omptarget --> Using lazy device initialization!
// CHECK: omptarget --> Plugin adaptor {{.*}} has index 0, exposes 0 out of {{.*}} devices!
// CHECK: omptarget --> Done registering entries!
// CHECK: omptarget --> Use default device id [[DEVICE_ID:.*]]
// CHECK: omptarget --> Device [[DEVICE_ID]] (local ID 0) has been lazily initialized! (IsInit = 1)
// clang-format on

#pragma omp target map(from : a[ : 10])
{ a[5] = 4; }

// CHECK: a[5] = 4

printf("a[5] = %d\n", a[5]);

free(a);

return 0;
}