@@ -2964,6 +2964,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
2964
2964
OMPX_DGPUMaps (" OMPX_DGPU_MAPS" , false ),
2965
2965
OMPX_SharedDescriptorMaxSize (" LIBOMPTARGET_SHARED_DESCRIPTOR_MAX_SIZE" ,
2966
2966
96 ),
2967
+ OMPX_EnableDevice2DeviceMemAccess (
2968
+ " OMPX_ENABLE_DEVICE_TO_DEVICE_MEM_ACCESS" , false ),
2967
2969
AMDGPUStreamManager (*this , Agent), AMDGPUEventManager(*this ),
2968
2970
AMDGPUSignalManager (*this ), Agent(Agent), HostDevice(HostDevice) {}
2969
2971
@@ -4557,6 +4559,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
4557
4559
// / memory. Default value is 48.
4558
4560
UInt32Envar OMPX_SharedDescriptorMaxSize;
4559
4561
4562
+ // Determines whether we call HSA API, upon device memory allocation,
4563
+ // for making the memory acceccible from other agents.
4564
+ // Default is disabled
4565
+ BoolEnvar OMPX_EnableDevice2DeviceMemAccess;
4566
+
4560
4567
// / Stream manager for AMDGPU streams.
4561
4568
AMDGPUStreamManagerTy AMDGPUStreamManager;
4562
4569
@@ -5323,7 +5330,8 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
5323
5330
}
5324
5331
}
5325
5332
5326
- if (Alloc) {
5333
+ if (Alloc && (Kind == TARGET_ALLOC_HOST || Kind == TARGET_ALLOC_SHARED ||
5334
+ OMPX_EnableDevice2DeviceMemAccess)) {
5327
5335
// Get a list of agents that can access this memory pool. Inherently
5328
5336
// necessary for host or shared allocations Also enabled for device memory
5329
5337
// to allow device to device memcpy
0 commit comments