-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[MLIR][NVGPUToNVVM] Remove typed pointer support #70867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This commit removes the support for lowering NVGPU to NVVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
@llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir Author: Christian Ulmann (Dinistro) ChangesThis commit removes the support for lowering NVGPU to NVVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect. Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502 Full diff: https://github.com/llvm/llvm-project/pull/70867.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 036c9b0039779ab..fb344ebd880e04d 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -823,11 +823,6 @@ def ConvertNVGPUToNVVMPass : Pass<"convert-nvgpu-to-nvvm"> {
let dependentDialects = [
"NVVM::NVVMDialect",
];
- let options = [
- Option<"useOpaquePointers", "use-opaque-pointers", "bool",
- /*default=*/"true", "Generate LLVM IR using opaque pointers "
- "instead of typed pointers">
- ];
}
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index efcde2ba58bd685..1977a571130ed12 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -402,7 +402,6 @@ struct ConvertNVGPUToNVVMPass
void runOnOperation() override {
LowerToLLVMOptions options(&getContext());
- options.useOpaquePointers = useOpaquePointers;
RewritePatternSet patterns(&getContext());
LLVMTypeConverter converter(&getContext(), options);
IRRewriter rewriter(&getContext());
@@ -451,7 +450,7 @@ struct ConvertNVGPUToNVVMPass
nvgpu::getMBarrierMemrefType(rewriter.getContext(), type));
});
converter.addConversion([&](nvgpu::TensorMapDescriptorType type) -> Type {
- return converter.getPointerType(type.getTensor().getElementType());
+ return LLVM::LLVMPointerType::get(type.getContext());
});
populateNVGPUToNVVMConversionPatterns(converter, patterns);
LLVMConversionTarget target(getContext());
@@ -651,16 +650,11 @@ struct NVGPUAsyncCopyLowering
Value dstPtr =
getStridedElementPtr(b.getLoc(), dstMemrefType, adaptor.getDst(),
adaptor.getDstIndices(), rewriter);
- auto i8Ty = IntegerType::get(op.getContext(), 8);
FailureOr<unsigned> dstAddressSpace =
getTypeConverter()->getMemRefAddressSpace(dstMemrefType);
if (failed(dstAddressSpace))
return rewriter.notifyMatchFailure(
loc, "destination memref address space not convertible to integer");
- auto dstPointerType =
- getTypeConverter()->getPointerType(i8Ty, *dstAddressSpace);
- if (!getTypeConverter()->useOpaquePointers())
- dstPtr = b.create<LLVM::BitcastOp>(dstPointerType, dstPtr);
auto srcMemrefType = cast<MemRefType>(op.getSrc().getType());
FailureOr<unsigned> srcAddressSpace =
@@ -671,13 +665,9 @@ struct NVGPUAsyncCopyLowering
Value scrPtr = getStridedElementPtr(loc, srcMemrefType, adaptor.getSrc(),
adaptor.getSrcIndices(), rewriter);
- auto srcPointerType =
- getTypeConverter()->getPointerType(i8Ty, *srcAddressSpace);
- if (!getTypeConverter()->useOpaquePointers())
- scrPtr = b.create<LLVM::BitcastOp>(srcPointerType, scrPtr);
// Intrinsics takes a global pointer so we need an address space cast.
- auto srcPointerGlobalType = getTypeConverter()->getPointerType(
- i8Ty, NVVM::NVVMMemorySpace::kGlobalMemorySpace);
+ auto srcPointerGlobalType = LLVM::LLVMPointerType::get(
+ op->getContext(), NVVM::NVVMMemorySpace::kGlobalMemorySpace);
scrPtr = b.create<LLVM::AddrSpaceCastOp>(srcPointerGlobalType, scrPtr);
int64_t dstElements = adaptor.getDstElements().getZExtValue();
int64_t sizeInBytes =
@@ -1128,8 +1118,7 @@ struct NVGPUTmaCreateDescriptorOpLowering
matchAndRewrite(nvgpu::TmaCreateDescriptorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
ImplicitLocOpBuilder b(op->getLoc(), rewriter);
- LLVM::LLVMPointerType llvmPointerType = getTypeConverter()->getPointerType(
- IntegerType::get(op->getContext(), 8));
+ auto llvmPointerType = LLVM::LLVMPointerType::get(op->getContext());
Type llvmInt64Type = IntegerType::get(op->getContext(), 64);
Value tensorElementType =
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 123a661193c4901..745cbdbd5153251 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-nvgpu-to-nvvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s -convert-nvgpu-to-nvvm | FileCheck %s
// RUN: mlir-opt %s -transform-interpreter | FileCheck %s
// CHECK-LABEL: @m16n8k16_fp16
diff --git a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir b/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir
deleted file mode 100644
index 1a37f1c046cf66d..000000000000000
--- a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir
+++ /dev/null
@@ -1,59 +0,0 @@
-// RUN: mlir-opt --convert-nvgpu-to-nvvm='use-opaque-pointers=0' --split-input-file %s | FileCheck %s
-
-// CHECK-LABEL: @async_cp(
-// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index)
-func.func @async_cp(
- %src: memref<128x128xf32>, %dst: memref<3x16x128xf32, 3>, %i : index) {
- // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
- // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<3 x i64>, array<3 x i64>)>
- // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(2048 : index) : i64
- // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
- // CHECK-DAG: %[[S1:.*]] = llvm.mlir.constant(128 : index) : i64
- // CHECK-DAG: %[[FI0:.*]] = llvm.mul %[[IDX1]], %[[S1]] : i64
- // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[FI0]] : i64
- // CHECK-DAG: %[[FI2:.*]] = llvm.add %[[FI1]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI2]]] : (!llvm.ptr<f32, 3>, i64) -> !llvm.ptr<f32, 3>
- // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<f32, 3> to !llvm.ptr<i8, 3>
- // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK-DAG: %[[S3:.*]] = llvm.mlir.constant(128 : index) : i64
- // CHECK-DAG: %[[FI3:.*]] = llvm.mul %[[IDX1]], %[[S3]] : i64
- // CHECK-DAG: %[[FI4:.*]] = llvm.add %[[FI3]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI4]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
- // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr<f32> to !llvm.ptr<i8>
- // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr<i8> to !llvm.ptr<i8, 1>
- // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca
- %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 : memref<128x128xf32> to memref<3x16x128xf32, 3>
- // CHECK: nvvm.cp.async.commit.group
- %1 = nvgpu.device_async_create_group %0
- // CHECK: nvvm.cp.async.wait.group 1
- nvgpu.device_async_wait %1 { numGroups = 1 : i32 }
-
- // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg
- %2 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 {bypassL1}: memref<128x128xf32> to memref<3x16x128xf32, 3>
- return
-}
-
-// -----
-
-// CHECK-LABEL: @async_cp_i4(
-// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index)
-func.func @async_cp_i4(
- %src: memref<128x64xi4>, %dst: memref<128x128xi4, 3>, %i : index) -> !nvgpu.device.async.token {
- // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
- // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4, 3>, ptr<i4, 3>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(128 : index) : i64
- // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
- // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI1]]] : (!llvm.ptr<i4, 3>, i64) -> !llvm.ptr<i4, 3>
- // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<i4, 3> to !llvm.ptr<i8, 3>
- // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4>, ptr<i4>, i64, array<2 x i64>, array<2 x i64>)>
- // CHECK-DAG: %[[S2:.*]] = llvm.mlir.constant(64 : index) : i64
- // CHECK-DAG: %[[FI2:.*]] = llvm.mul %[[IDX1]], %[[S2]] : i64
- // CHECK-DAG: %[[FI3:.*]] = llvm.add %[[FI2]], %[[IDX1]] : i64
- // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI3]]] : (!llvm.ptr<i4>, i64) -> !llvm.ptr<i4>
- // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr<i4> to !llvm.ptr<i8>
- // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr<i8> to !llvm.ptr<i8, 1>
- // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca
- %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i], 32 : memref<128x64xi4> to memref<128x128xi4, 3>
- return %0 : !nvgpu.device.async.token
-}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This commit removes the support for lowering NVGPU to NVVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect.
Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502