Skip to content

Commit b7f72cf

Browse files
d0khamphet
authored andcommitted
[mlir][GPU] block_id has the grid size as its range
1 parent 33e7d5b commit b7f72cf

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
377377
NVVM::ClusterDimZOp>>(converter, IndexKind::Other, IntrType::Dim);
378378
patterns.add<gpu::index_lowering::OpLowering<
379379
gpu::BlockIdOp, NVVM::BlockIdXOp, NVVM::BlockIdYOp, NVVM::BlockIdZOp>>(
380-
converter, IndexKind::Block, IntrType::Id);
380+
converter, IndexKind::Grid, IntrType::Id);
381381
patterns.add<gpu::index_lowering::OpLowering<
382382
gpu::GridDimOp, NVVM::GridDimXOp, NVVM::GridDimYOp, NVVM::GridDimZOp>>(
383383
converter, IndexKind::Grid, IntrType::Dim);

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,25 @@ gpu.module @test_module_49 {
943943
}
944944
}
945945

946+
gpu.module @test_module_50 {
947+
// CHECK-LABEL: func @kernel_with_grid_size(
948+
gpu.func @kernel_with_grid_size(%arg0: !llvm.ptr) kernel attributes {known_grid_size = array<i32: 32, 4, 2>} {
949+
// CHECK: = nvvm.read.ptx.sreg.ctaid.x range <i32, 0, 32> : i32
950+
%0 = gpu.block_id x
951+
// CHECK: = nvvm.read.ptx.sreg.ctaid.y range <i32, 0, 4> : i32
952+
%1 = gpu.block_id y
953+
// CHECK: = nvvm.read.ptx.sreg.ctaid.z range <i32, 0, 2> : i32
954+
%2 = gpu.block_id z
955+
956+
// Fake usage to prevent dead code elimination
957+
%3 = arith.addi %0, %1 : index
958+
%4 = arith.addi %3, %2 : index
959+
%5 = arith.index_cast %4 : index to i64
960+
llvm.store %5, %arg0 : i64, !llvm.ptr
961+
gpu.return
962+
}
963+
}
964+
946965
module attributes {transform.with_named_sequence} {
947966
transform.named_sequence @__transform_main(%toplevel_module: !transform.any_op {transform.readonly}) {
948967
%gpu_module = transform.structured.match ops{["gpu.module"]} in %toplevel_module

0 commit comments

Comments
 (0)