Skip to content

Commit 74bf0b1

Browse files
authored
[mlir] Lower math dialect later in gpu-lower-to-nvvm-pipeline (#78556)
This PR moves lowering of math dialect later in the pipeline. Because math dialect is lowered correctly by `createConvertGpuOpsToNVVMOps` for GPU target, and it needs to run it first.
1 parent 4f32f5d commit 74bf0b1

File tree

2 files changed

+30
-1
lines changed

2 files changed

+30
-1
lines changed

mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ void buildCommonPassPipeline(
5151
pm.addPass(createConvertVectorToSCFPass());
5252
pm.addPass(createConvertSCFToCFPass());
5353
pm.addPass(createConvertNVVMToLLVMPass());
54-
pm.addPass(createConvertMathToLLVMPass());
5554
pm.addPass(createConvertFuncToLLVMPass());
5655
pm.addPass(memref::createExpandStridedMetadataPass());
5756

@@ -98,6 +97,7 @@ void buildHostPostPipeline(OpPassManager &pm,
9897
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
9998
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
10099
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
100+
pm.addPass(createConvertMathToLLVMPass());
101101
pm.addPass(createCanonicalizerPass());
102102
pm.addPass(createCSEPass());
103103
pm.addPass(createReconcileUnrealizedCastsPass());
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: mlir-opt %s \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
3+
// RUN: -split-input-file | FileCheck %s
4+
5+
// RUN: mlir-opt %s \
6+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=isa" \
7+
// RUN: -split-input-file -debug-only=serialize-to-isa \
8+
// RUN: 2>&1 | FileCheck %s --check-prefixes=CHECK-PTX
9+
10+
// This test checks whether the GPU region is compiled correctly to PTX by
11+
// pipeline. It doesn't test IR for GPU side, but it can test Host IR and
12+
// generated PTX.
13+
14+
// CHECK-LABEL: llvm.func @test_math(%arg0: f32) {
15+
func.func @test_math(%arg0 : f32) {
16+
%c2 = arith.constant 2 : index
17+
%c1 = arith.constant 1 : index
18+
// CHECK: gpu.launch_func @test_math_kernel::@test_math_kernel
19+
// CHECK: gpu.binary @test_math_kernel [#gpu.object<#nvvm.target
20+
gpu.launch
21+
blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1)
22+
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
23+
// CHECK-PTX: __nv_expf
24+
%s1 = math.exp %arg0 : f32
25+
gpu.printf "%f" %s1 : f32
26+
gpu.terminator
27+
}
28+
return
29+
}

0 commit comments

Comments
 (0)