Skip to content

[MLIR][NVGPUToNVVM] Remove typed pointer support #70867

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 2, 2023

Conversation

Dinistro
Copy link
Contributor

This commit removes the support for lowering NVGPU to NVVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect.

Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502

This commit removes the support for lowering NVGPU to NVVM dialect with
typed pointers. Typed pointers have been deprecated for a while now and
it's planned to soon remove them from the LLVM dialect.

Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502
@llvmbot
Copy link
Member

llvmbot commented Oct 31, 2023

@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Christian Ulmann (Dinistro)

Changes

This commit removes the support for lowering NVGPU to NVVM dialect with typed pointers. Typed pointers have been deprecated for a while now and it's planned to soon remove them from the LLVM dialect.

Related PSA: https://discourse.llvm.org/t/psa-removal-of-typed-pointers-from-the-llvm-dialect/74502


Full diff: https://github.com/llvm/llvm-project/pull/70867.diff

4 Files Affected:

  • (modified) mlir/include/mlir/Conversion/Passes.td (-5)
  • (modified) mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp (+4-15)
  • (modified) mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir (+1-1)
  • (removed) mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir (-59)
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 036c9b0039779ab..fb344ebd880e04d 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -823,11 +823,6 @@ def ConvertNVGPUToNVVMPass : Pass<"convert-nvgpu-to-nvvm"> {
   let dependentDialects = [
     "NVVM::NVVMDialect",
   ];
-  let options = [
-    Option<"useOpaquePointers", "use-opaque-pointers", "bool",
-              /*default=*/"true", "Generate LLVM IR using opaque pointers "
-              "instead of typed pointers">
-  ];
 }
 
 
diff --git a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
index efcde2ba58bd685..1977a571130ed12 100644
--- a/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
+++ b/mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
@@ -402,7 +402,6 @@ struct ConvertNVGPUToNVVMPass
 
   void runOnOperation() override {
     LowerToLLVMOptions options(&getContext());
-    options.useOpaquePointers = useOpaquePointers;
     RewritePatternSet patterns(&getContext());
     LLVMTypeConverter converter(&getContext(), options);
     IRRewriter rewriter(&getContext());
@@ -451,7 +450,7 @@ struct ConvertNVGPUToNVVMPass
           nvgpu::getMBarrierMemrefType(rewriter.getContext(), type));
     });
     converter.addConversion([&](nvgpu::TensorMapDescriptorType type) -> Type {
-      return converter.getPointerType(type.getTensor().getElementType());
+      return LLVM::LLVMPointerType::get(type.getContext());
     });
     populateNVGPUToNVVMConversionPatterns(converter, patterns);
     LLVMConversionTarget target(getContext());
@@ -651,16 +650,11 @@ struct NVGPUAsyncCopyLowering
     Value dstPtr =
         getStridedElementPtr(b.getLoc(), dstMemrefType, adaptor.getDst(),
                              adaptor.getDstIndices(), rewriter);
-    auto i8Ty = IntegerType::get(op.getContext(), 8);
     FailureOr<unsigned> dstAddressSpace =
         getTypeConverter()->getMemRefAddressSpace(dstMemrefType);
     if (failed(dstAddressSpace))
       return rewriter.notifyMatchFailure(
           loc, "destination memref address space not convertible to integer");
-    auto dstPointerType =
-        getTypeConverter()->getPointerType(i8Ty, *dstAddressSpace);
-    if (!getTypeConverter()->useOpaquePointers())
-      dstPtr = b.create<LLVM::BitcastOp>(dstPointerType, dstPtr);
 
     auto srcMemrefType = cast<MemRefType>(op.getSrc().getType());
     FailureOr<unsigned> srcAddressSpace =
@@ -671,13 +665,9 @@ struct NVGPUAsyncCopyLowering
 
     Value scrPtr = getStridedElementPtr(loc, srcMemrefType, adaptor.getSrc(),
                                         adaptor.getSrcIndices(), rewriter);
-    auto srcPointerType =
-        getTypeConverter()->getPointerType(i8Ty, *srcAddressSpace);
-    if (!getTypeConverter()->useOpaquePointers())
-      scrPtr = b.create<LLVM::BitcastOp>(srcPointerType, scrPtr);
     // Intrinsics takes a global pointer so we need an address space cast.
-    auto srcPointerGlobalType = getTypeConverter()->getPointerType(
-        i8Ty, NVVM::NVVMMemorySpace::kGlobalMemorySpace);
+    auto srcPointerGlobalType = LLVM::LLVMPointerType::get(
+        op->getContext(), NVVM::NVVMMemorySpace::kGlobalMemorySpace);
     scrPtr = b.create<LLVM::AddrSpaceCastOp>(srcPointerGlobalType, scrPtr);
     int64_t dstElements = adaptor.getDstElements().getZExtValue();
     int64_t sizeInBytes =
@@ -1128,8 +1118,7 @@ struct NVGPUTmaCreateDescriptorOpLowering
   matchAndRewrite(nvgpu::TmaCreateDescriptorOp op, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
     ImplicitLocOpBuilder b(op->getLoc(), rewriter);
-    LLVM::LLVMPointerType llvmPointerType = getTypeConverter()->getPointerType(
-        IntegerType::get(op->getContext(), 8));
+    auto llvmPointerType = LLVM::LLVMPointerType::get(op->getContext());
     Type llvmInt64Type = IntegerType::get(op->getContext(), 64);
 
     Value tensorElementType =
diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
index 123a661193c4901..745cbdbd5153251 100644
--- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-nvgpu-to-nvvm='use-opaque-pointers=1' | FileCheck %s
+// RUN: mlir-opt %s -convert-nvgpu-to-nvvm | FileCheck %s
 // RUN: mlir-opt %s -transform-interpreter | FileCheck %s
 
 // CHECK-LABEL: @m16n8k16_fp16
diff --git a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir b/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir
deleted file mode 100644
index 1a37f1c046cf66d..000000000000000
--- a/mlir/test/Conversion/NVGPUToNVVM/typed-pointers.mlir
+++ /dev/null
@@ -1,59 +0,0 @@
-// RUN: mlir-opt --convert-nvgpu-to-nvvm='use-opaque-pointers=0' --split-input-file %s | FileCheck %s
-
-// CHECK-LABEL: @async_cp(
-// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index)
-func.func @async_cp(
-  %src: memref<128x128xf32>, %dst: memref<3x16x128xf32, 3>, %i : index) {
-  // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
-  // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32, 3>, ptr<f32, 3>, i64, array<3 x i64>, array<3 x i64>)>
-  // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(2048 : index) : i64
-  // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
-  // CHECK-DAG: %[[S1:.*]] = llvm.mlir.constant(128 : index) : i64
-  // CHECK-DAG: %[[FI0:.*]] = llvm.mul %[[IDX1]], %[[S1]] : i64
-  // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[FI0]] : i64
-  // CHECK-DAG: %[[FI2:.*]] = llvm.add %[[FI1]], %[[IDX1]] : i64
-  // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI2]]] : (!llvm.ptr<f32, 3>, i64) -> !llvm.ptr<f32, 3>
-  // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<f32, 3> to !llvm.ptr<i8, 3>
-  // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<f32>, ptr<f32>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK-DAG: %[[S3:.*]] = llvm.mlir.constant(128 : index) : i64
-  // CHECK-DAG: %[[FI3:.*]] = llvm.mul %[[IDX1]], %[[S3]]  : i64
-  // CHECK-DAG: %[[FI4:.*]] = llvm.add %[[FI3]], %[[IDX1]]  : i64
-  // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI4]]] : (!llvm.ptr<f32>, i64) -> !llvm.ptr<f32>
-  // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr<f32> to !llvm.ptr<i8>
-  // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr<i8> to !llvm.ptr<i8, 1>
-  // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca
-  %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 : memref<128x128xf32> to memref<3x16x128xf32, 3>
-  // CHECK: nvvm.cp.async.commit.group
-  %1 = nvgpu.device_async_create_group %0
-  // CHECK: nvvm.cp.async.wait.group 1
-  nvgpu.device_async_wait %1 { numGroups = 1 : i32 }
-
-  // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16, cache = cg
-  %2 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i, %i], 4 {bypassL1}: memref<128x128xf32> to memref<3x16x128xf32, 3>
-  return
-}
-
-// -----
-
-// CHECK-LABEL: @async_cp_i4(
-// CHECK-SAME: %[[IDX:[a-zA-Z0-9_]+]]: index)
-func.func @async_cp_i4(
-  %src: memref<128x64xi4>, %dst: memref<128x128xi4, 3>, %i : index) -> !nvgpu.device.async.token {
-  // CHECK: %[[IDX1:.*]] = builtin.unrealized_conversion_cast %[[IDX]] : index to i64
-  // CHECK-DAG: %[[BASEDST:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4, 3>, ptr<i4, 3>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK-DAG: %[[S0:.*]] = llvm.mlir.constant(128 : index) : i64
-  // CHECK-DAG: %[[LI:.*]] = llvm.mul %[[IDX1]], %[[S0]] : i64
-  // CHECK-DAG: %[[FI1:.*]] = llvm.add %[[LI]], %[[IDX1]] : i64
-  // CHECK-DAG: %[[ADDRESSDST:.*]] = llvm.getelementptr %[[BASEDST]][%[[FI1]]] : (!llvm.ptr<i4, 3>, i64) -> !llvm.ptr<i4, 3>
-  // CHECK-DAG: %[[CAST0:.*]] = llvm.bitcast %[[ADDRESSDST]] : !llvm.ptr<i4, 3> to !llvm.ptr<i8, 3>
-  // CHECK-DAG: %[[BASESRC:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.struct<(ptr<i4>, ptr<i4>, i64, array<2 x i64>, array<2 x i64>)>
-  // CHECK-DAG: %[[S2:.*]] = llvm.mlir.constant(64 : index) : i64
-  // CHECK-DAG: %[[FI2:.*]] = llvm.mul %[[IDX1]], %[[S2]]  : i64
-  // CHECK-DAG: %[[FI3:.*]] = llvm.add %[[FI2]], %[[IDX1]]  : i64
-  // CHECK-DAG: %[[ADDRESSSRC:.*]] = llvm.getelementptr %[[BASESRC]][%[[FI3]]] : (!llvm.ptr<i4>, i64) -> !llvm.ptr<i4>
-  // CHECK-DAG: %[[CAST1:.*]] = llvm.bitcast %[[ADDRESSSRC]] : !llvm.ptr<i4> to !llvm.ptr<i8>
-  // CHECK-DAG: %[[CAST2:.*]] = llvm.addrspacecast %[[CAST1]] : !llvm.ptr<i8> to !llvm.ptr<i8, 1>
-  // CHECK-DAG: nvvm.cp.async.shared.global %[[CAST0]], %[[CAST2]], 16, cache = ca
-  %0 = nvgpu.device_async_copy %src[%i, %i], %dst[%i, %i], 32 : memref<128x64xi4> to memref<128x128xi4, 3>
-  return %0 : !nvgpu.device.async.token
-}

Copy link
Member

@zero9178 zero9178 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@Dinistro Dinistro merged commit 2f17c9f into llvm:main Nov 2, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants