[mlir][GPU] block_id has the grid size as its range

d0k · hamphet · commit b7f72cf923b5 · 2024-09-18T00:48:56.000Z
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -377,7 +377,7 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
       NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim);
   patterns.add<gpu::index_lowering::OpLowering<
       gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
-      converter, IndexKind::Block, IntrType::Id);
+      converter, IndexKind::Grid, IntrType::Id);
   patterns.add<gpu::index_lowering::OpLowering<
       gpu::GridDimOp, NVVM::GridDimXOp, NVVM::GridDimYOp, NVVM::GridDimZOp>>(
       converter, IndexKind::Grid, IntrType::Dim);
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -943,6 +943,25 @@ gpu.module @test_module_49 {
   }
 }
 
+gpu.module @test_module_50 {
+// CHECK-LABEL: func @kernel_with_grid_size(
+  gpu.func @kernel_with_grid_size(%arg0: !llvm.ptr) kernel attributes {known_grid_size = array<i32: 32, 4, 2>} {
+    // CHECK: = nvvm.read.ptx.sreg.ctaid.x range <i32, 0, 32> : i32
+    %0 = gpu.block_id x
+    // CHECK: = nvvm.read.ptx.sreg.ctaid.y range <i32, 0, 4> : i32
+    %1 = gpu.block_id y
+    // CHECK: = nvvm.read.ptx.sreg.ctaid.z range <i32, 0, 2> : i32
+    %2 = gpu.block_id z
+
+    // Fake usage to prevent dead code elimination
+    %3 = arith.addi %0, %1 : index
+    %4 = arith.addi %3, %2 : index
+    %5 = arith.index_cast %4 : index to i64
+    llvm.store %5, %arg0 : i64, !llvm.ptr
+    gpu.return
+  }
+}
+
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%toplevel_module: !transform.any_op {transform.readonly}) {
     %gpu_module = transform.structured.match ops{["gpu.module"]} in %toplevel_module