Skip to content

Commit f8058a3

Browse files
authored
[mlir] Fix nvvm integration tests build error (#70113)
#69934 broke integration tests that rely on the kernel-bare-ptr-calling-convention and host-bare-ptr-calling-convention flags. This PR brings these flags. Also the kernel-index-bitwidth flag is removed, as kernel pointer size depends on the host. Separating host (64-bit) and kernel (32-bit) is not viable.
1 parent c780352 commit f8058a3

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: mlir-opt %s \
22
// RUN: -transform-interpreter \
33
// RUN: -test-transform-dialect-erase-schedule \
4-
// RUN: -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
4+
// RUN: -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
55
// RUN: | mlir-cpu-runner \
66
// RUN: --shared-libs=%mlir_cuda_runtime \
77
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// RUN: mlir-opt %s \
1212
// RUN: -transform-interpreter \
1313
// RUN: -test-transform-dialect-erase-schedule \
14-
// RUN: -test-lower-to-nvvm="kernel-index-bitwidth=32 cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
14+
// RUN: -test-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
1515
// RUN: | mlir-cpu-runner \
1616
// RUN: --shared-libs=%mlir_cuda_runtime \
1717
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ struct TestLowerToNVVMOptions
6565
*this, "opt-level",
6666
llvm::cl::desc("Optimization level for NVVM compilation"),
6767
llvm::cl::init(2)};
68+
PassOptions::Option<bool> kernelUseBarePtrCallConv{
69+
*this, "kernel-bare-ptr-calling-convention",
70+
llvm::cl::desc(
71+
"Whether to use the bareptr calling convention on the kernel "
72+
"(warning this should be false until the GPU layering is fixed)"),
73+
llvm::cl::init(false)};
74+
PassOptions::Option<bool> hostUseBarePtrCallConv{
75+
*this, "host-bare-ptr-calling-convention",
76+
llvm::cl::desc(
77+
"Whether to use the bareptr calling convention on the host (warning "
78+
"this should be false until the GPU layering is fixed)"),
79+
llvm::cl::init(false)};
6880
};
6981

7082
//===----------------------------------------------------------------------===//
@@ -105,7 +117,10 @@ void buildCommonPassPipeline(OpPassManager &pm,
105117
void buildGpuPassPipeline(OpPassManager &pm,
106118
const TestLowerToNVVMOptions &options) {
107119
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
108-
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
120+
ConvertGpuOpsToNVVMOpsOptions opt;
121+
opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
122+
opt.indexBitwidth = options.indexBitWidth;
123+
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
109124
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
110125
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
111126
pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
@@ -116,7 +131,10 @@ void buildGpuPassPipeline(OpPassManager &pm,
116131
//===----------------------------------------------------------------------===//
117132
void buildHostPostPipeline(OpPassManager &pm,
118133
const TestLowerToNVVMOptions &options) {
119-
pm.addPass(createGpuToLLVMConversionPass());
134+
GpuToLLVMConversionPassOptions opt;
135+
opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
136+
opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
137+
pm.addPass(createGpuToLLVMConversionPass(opt));
120138

121139
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
122140
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;

0 commit comments

Comments
 (0)