Skip to content

Commit 5bcf162

Browse files
nbpatellegrosbuffle
authored andcommitted
[MLIR] Pass hostShared flag in gpu.alloc op to runtime wrappers (llvm#66401)
This PR is a breakdown of the big PR llvm#65539 which enables intel gpu integration. In this PR we pass hostShared flag to runtime wrappers (required by SyclRuntimeWrappers which will come in subsequent PR) to indicate if the allocation is done on host shared gpu memory or device only memory.
1 parent 0576c26 commit 5bcf162

File tree

5 files changed

+25
-11
lines changed

5 files changed

+25
-11
lines changed

mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
168168
"mgpuMemAlloc",
169169
llvmPointerType /* void * */,
170170
{llvmIntPtrType /* intptr_t sizeBytes */,
171-
llvmPointerType /* void *stream */}};
171+
llvmPointerType /* void *stream */,
172+
llvmInt8Type /* bool isHostShared */}};
172173
FunctionCallBuilder deallocCallBuilder = {
173174
"mgpuMemFree",
174175
llvmVoidType,
@@ -787,19 +788,23 @@ LogicalResult ConvertHostUnregisterOpToGpuRuntimeCallPattern::matchAndRewrite(
787788
LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
788789
gpu::AllocOp allocOp, OpAdaptor adaptor,
789790
ConversionPatternRewriter &rewriter) const {
790-
if (adaptor.getHostShared())
791-
return rewriter.notifyMatchFailure(
792-
allocOp, "host_shared allocation is not supported");
793791

794792
MemRefType memRefType = allocOp.getType();
795793

796794
if (failed(areAllLLVMTypes(allocOp, adaptor.getOperands(), rewriter)) ||
797-
!isConvertibleAndHasIdentityMaps(memRefType) ||
798-
failed(isAsyncWithOneDependency(rewriter, allocOp)))
795+
!isConvertibleAndHasIdentityMaps(memRefType))
799796
return failure();
800797

801798
auto loc = allocOp.getLoc();
802799

800+
bool isShared = allocOp.getHostShared();
801+
802+
if (isShared && allocOp.getAsyncToken())
803+
return rewriter.notifyMatchFailure(
804+
allocOp, "Host Shared allocation cannot be done async");
805+
else if (!isShared && failed(isAsyncWithOneDependency(rewriter, allocOp)))
806+
return failure();
807+
803808
// Get shape of the memref as values: static sizes are constant
804809
// values and dynamic sizes are passed to 'alloc' as operands.
805810
SmallVector<Value, 4> shape;
@@ -812,8 +817,13 @@ LogicalResult ConvertAllocOpToGpuRuntimeCallPattern::matchAndRewrite(
812817
// descriptor.
813818
Type elementPtrType = this->getElementPtrType(memRefType);
814819
auto stream = adaptor.getAsyncDependencies().front();
820+
821+
auto isHostShared = rewriter.create<mlir::LLVM::ConstantOp>(
822+
loc, llvmInt8Type, rewriter.getI8IntegerAttr(isShared));
823+
815824
Value allocatedPtr =
816-
allocCallBuilder.create(loc, rewriter, {sizeBytes, stream}).getResult();
825+
allocCallBuilder.create(loc, rewriter, {sizeBytes, stream, isHostShared})
826+
.getResult();
817827
if (!getTypeConverter()->useOpaquePointers())
818828
allocatedPtr =
819829
rewriter.create<LLVM::BitcastOp>(loc, elementPtrType, allocatedPtr);

mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,8 @@ extern MLIR_CUDA_WRAPPERS_EXPORT "C" void mgpuEventRecord(CUevent event,
232232
CUDA_REPORT_IF_ERROR(cuEventRecord(event, stream));
233233
}
234234

235-
extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream /*stream*/) {
235+
extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream /*stream*/,
236+
bool /*isHostShared*/) {
236237
ScopedContext scopedContext;
237238
CUdeviceptr ptr = 0;
238239
if (sizeBytes != 0)

mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ extern "C" void mgpuEventRecord(hipEvent_t event, hipStream_t stream) {
104104
HIP_REPORT_IF_ERROR(hipEventRecord(event, stream));
105105
}
106106

107-
extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, hipStream_t /*stream*/) {
107+
extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, hipStream_t /*stream*/,
108+
bool /*isHostShared*/) {
108109
void *ptr;
109110
HIP_REPORT_IF_ERROR(hipMalloc(&ptr, sizeBytes));
110111
return ptr;

mlir/test/Conversion/GPUCommon/lower-alloc-to-gpu-runtime-calls.mlir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ module attributes {gpu.container_module} {
88
%0 = gpu.wait async
99
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]]
1010
// CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]]
11-
// CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]])
11+
// CHECK: %[[isHostShared:.*]] = llvm.mlir.constant
12+
// CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]], %[[isHostShared]])
1213
%1, %2 = gpu.alloc async [%0] (%size) : memref<?xf32>
1314
// CHECK: %[[float_ptr:.*]] = llvm.extractvalue {{.*}}[0]
1415
// CHECK: llvm.call @mgpuMemFree(%[[float_ptr]], %[[stream]])

mlir/test/Conversion/GPUCommon/typed-pointers.mlir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ module attributes {gpu.container_module} {
88
%0 = gpu.wait async
99
// CHECK: %[[gep:.*]] = llvm.getelementptr {{.*}}[%[[size]]]
1010
// CHECK: %[[size_bytes:.*]] = llvm.ptrtoint %[[gep]]
11-
// CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]])
11+
// CHECK: %[[isHostShared:.*]] = llvm.mlir.constant
12+
// CHECK: llvm.call @mgpuMemAlloc(%[[size_bytes]], %[[stream]], %[[isHostShared]])
1213
%1, %2 = gpu.alloc async [%0] (%size) : memref<?xf32>
1314
// CHECK: %[[float_ptr:.*]] = llvm.extractvalue {{.*}}[0]
1415
// CHECK: %[[void_ptr:.*]] = llvm.bitcast %[[float_ptr]]

0 commit comments

Comments
 (0)