Skip to content

Commit 53602e6

Browse files
authored
[OpenMP][OMPT] Fix device identifier collision during callbacks (#65595)
Fixes: #65104 When a user assigns devices to target regions it may happen that different identifiers will map onto the same id within different plugins. This will lead to situations where callbacks will become much harder to read, as ambiguous identifiers are reported. We fix this by collecting the index-offset upon general RTL initialization. Which in turn, allows to calculate the unique, user-observable device id.
1 parent 87461d6 commit 53602e6

File tree

6 files changed

+43
-10
lines changed

6 files changed

+43
-10
lines changed

openmp/libomptarget/include/omptargetplugin.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,10 @@ int32_t __tgt_rtl_data_notify_mapped(int32_t ID, void *HstPtr, int64_t Size);
218218
// host address \p HstPtr and \p Size bytes.
219219
int32_t __tgt_rtl_data_notify_unmapped(int32_t ID, void *HstPtr);
220220

221+
// Set the global device identifier offset, such that the plugin may determine a
222+
// unique device number.
223+
int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset);
224+
221225
#ifdef __cplusplus
222226
}
223227
#endif

openmp/libomptarget/include/rtl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ struct RTLInfoTy {
7272
typedef int32_t(data_unlock_ty)(int32_t, void *);
7373
typedef int32_t(data_notify_mapped_ty)(int32_t, void *, int64_t);
7474
typedef int32_t(data_notify_unmapped_ty)(int32_t, void *);
75+
typedef int32_t(set_device_offset_ty)(int32_t);
7576
typedef int32_t(activate_record_replay_ty)(int32_t, uint64_t, bool, bool);
7677

7778
int32_t Idx = -1; // RTL index, index is the number of devices
@@ -125,6 +126,7 @@ struct RTLInfoTy {
125126
data_unlock_ty *data_unlock = nullptr;
126127
data_notify_mapped_ty *data_notify_mapped = nullptr;
127128
data_notify_unmapped_ty *data_notify_unmapped = nullptr;
129+
set_device_offset_ty *set_device_offset = nullptr;
128130
activate_record_replay_ty *activate_record_replay = nullptr;
129131

130132
// Are there images associated with this RTL.

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,8 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
542542
bool ExpectedStatus = false;
543543
if (OmptInitialized.compare_exchange_strong(ExpectedStatus, true))
544544
performOmptCallback(device_initialize,
545-
/* device_num */ DeviceId,
545+
/* device_num */ DeviceId +
546+
Plugin.getDeviceIdStartIndex(),
546547
/* type */ getComputeUnitKind().c_str(),
547548
/* device */ reinterpret_cast<ompt_device_t *>(this),
548549
/* lookup */ ompt::lookupCallbackByName,
@@ -587,7 +588,7 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
587588
return Plugin::success();
588589
}
589590

590-
Error GenericDeviceTy::deinit() {
591+
Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
591592
// Delete the memory manager before deinitializing the device. Otherwise,
592593
// we may delete device allocations after the device is deinitialized.
593594
if (MemoryManager)
@@ -605,7 +606,9 @@ Error GenericDeviceTy::deinit() {
605606
if (ompt::Initialized) {
606607
bool ExpectedStatus = true;
607608
if (OmptInitialized.compare_exchange_strong(ExpectedStatus, false))
608-
performOmptCallback(device_finalize, /* device_num */ DeviceId);
609+
performOmptCallback(device_finalize,
610+
/* device_num */ DeviceId +
611+
Plugin.getDeviceIdStartIndex());
609612
}
610613
#endif
611614

@@ -656,7 +659,8 @@ GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
656659
size_t Bytes =
657660
getPtrDiff(InputTgtImage->ImageEnd, InputTgtImage->ImageStart);
658661
performOmptCallback(device_load,
659-
/* device_num */ DeviceId,
662+
/* device_num */ DeviceId +
663+
Plugin.getDeviceIdStartIndex(),
660664
/* FileName */ nullptr,
661665
/* File Offset */ 0,
662666
/* VmaInFile */ nullptr,
@@ -1362,7 +1366,7 @@ Error GenericPluginTy::deinitDevice(int32_t DeviceId) {
13621366
return Plugin::success();
13631367

13641368
// Deinitialize the device and release its resources.
1365-
if (auto Err = Devices[DeviceId]->deinit())
1369+
if (auto Err = Devices[DeviceId]->deinit(*this))
13661370
return Err;
13671371

13681372
// Delete the device and invalidate its reference.
@@ -1815,6 +1819,12 @@ int32_t __tgt_rtl_init_device_info(int32_t DeviceId,
18151819
return OFFLOAD_SUCCESS;
18161820
}
18171821

1822+
int32_t __tgt_rtl_set_device_offset(int32_t DeviceIdOffset) {
1823+
Plugin::get().setDeviceIdStartIndex(DeviceIdOffset);
1824+
1825+
return OFFLOAD_SUCCESS;
1826+
}
1827+
18181828
#ifdef __cplusplus
18191829
}
18201830
#endif

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
611611
/// Deinitialize the device and free all its resources. After this call, the
612612
/// device is no longer considered ready, so no queries or modifications are
613613
/// allowed.
614-
Error deinit();
614+
Error deinit(GenericPluginTy &Plugin);
615615
virtual Error deinitImpl() = 0;
616616

617617
/// Load the binary image into the device and return the target table.
@@ -946,6 +946,12 @@ struct GenericPluginTy {
946946
/// Get the number of active devices.
947947
int32_t getNumDevices() const { return NumDevices; }
948948

949+
/// Get the plugin-specific device identifier offset.
950+
int32_t getDeviceIdStartIndex() const { return DeviceIdStartIndex; }
951+
952+
/// Set the plugin-specific device identifier offset.
953+
void setDeviceIdStartIndex(int32_t Offset) { DeviceIdStartIndex = Offset; }
954+
949955
/// Get the ELF code to recognize the binary image of this plugin.
950956
virtual uint16_t getMagicElfBits() const = 0;
951957

@@ -1010,6 +1016,11 @@ struct GenericPluginTy {
10101016
/// Number of devices available for the plugin.
10111017
int32_t NumDevices = 0;
10121018

1019+
/// Index offset, which when added to a DeviceId, will yield a unique
1020+
/// user-observable device identifier. This is especially important when
1021+
/// DeviceIds of multiple plugins / RTLs need to be distinguishable.
1022+
int32_t DeviceIdStartIndex = 0;
1023+
10131024
/// Array of pointers to the devices. Initially, they are all set to nullptr.
10141025
/// Once a device is initialized, the pointer is stored in the position given
10151026
/// by its device id. A position with nullptr means that the corresponding

openmp/libomptarget/src/device.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
583583
void *TargetPtr = nullptr;
584584
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
585585
RegionInterface.getCallbacks<ompt_target_data_alloc>(),
586-
RTLDeviceID, HstPtr, &TargetPtr, Size,
586+
DeviceID, HstPtr, &TargetPtr, Size,
587587
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
588588

589589
TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
@@ -594,7 +594,7 @@ int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
594594
/// RAII to establish tool anchors before and after data deletion
595595
OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII(
596596
RegionInterface.getCallbacks<ompt_target_data_delete>(),
597-
RTLDeviceID, TgtAllocBegin,
597+
DeviceID, TgtAllocBegin,
598598
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
599599

600600
return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind);
@@ -632,7 +632,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
632632
OMPT_IF_BUILT(
633633
InterfaceRAII TargetDataSubmitRAII(
634634
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
635-
RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
635+
DeviceID, TgtPtrBegin, HstPtrBegin, Size,
636636
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
637637

638638
if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
@@ -660,7 +660,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
660660
OMPT_IF_BUILT(
661661
InterfaceRAII TargetDataRetrieveRAII(
662662
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
663-
RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
663+
DeviceID, HstPtrBegin, TgtPtrBegin, Size,
664664
/* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
665665

666666
if (!RTL->data_retrieve_async || !RTL->synchronize)

openmp/libomptarget/src/rtl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,8 @@ bool RTLsTy::attemptLoadRTL(const std::string &RTLName, RTLInfoTy &RTL) {
249249
DynLibrary->getAddressOfSymbol("__tgt_rtl_data_notify_mapped");
250250
*((void **)&RTL.data_notify_unmapped) =
251251
DynLibrary->getAddressOfSymbol("__tgt_rtl_data_notify_unmapped");
252+
*((void **)&RTL.set_device_offset) =
253+
DynLibrary->getAddressOfSymbol("__tgt_rtl_set_device_offset");
252254

253255
// Record Replay RTL
254256
*((void **)&RTL.activate_record_replay) =
@@ -424,6 +426,10 @@ void RTLsTy::initRTLonce(RTLInfoTy &R) {
424426
R.IsUsed = true;
425427
UsedRTLs.push_back(&R);
426428

429+
// If possible, set the device identifier offset
430+
if (R.set_device_offset)
431+
R.set_device_offset(Start);
432+
427433
DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler.get()), R.Idx);
428434
}
429435
}

0 commit comments

Comments
 (0)