Skip to content

Commit ace69e6

Browse files
authored
[mlir][gpu] Improve gpu-lower-to-nvvm-pipeline Documentation (#77062)
This PR improves the documentation for the `gpu-lower-to-nvvm-pipeline` (as it was remaning item for #75775) - Changes pipeline `gpu-lower-to-nvvm` -> `gpu-lower-to-nvvm-pipeline` - Adds a section in GPU Dialect in website. It clarifies the pipeline's functionality in lowering primary dialects to NVVM targets.
1 parent 44b3cf4 commit ace69e6

31 files changed

+354
-93
lines changed

mlir/docs/Dialects/GPU.md

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,50 @@ mlir-translate example-nvvm.mlir \
6060
-o example.ll
6161
```
6262

63+
### Default NVVM Compilation Pipeline: gpu-lower-to-nvvm-pipeline
64+
65+
The `gpu-lower-to-nvvm-pipeline` compilation pipeline serves as the default way
66+
for NVVM target compilation within MLIR. This pipeline operates by lowering
67+
primary dialects (arith, memref, scf, vector, gpu, and nvgpu) to NVVM target. It
68+
begins by lowering GPU code region(s) to the specified NVVM compilation target
69+
and subsequently handles the host code.
70+
71+
This pipeline specifically requires explicitly parallel IR and doesn't do GPU
72+
parallelization. To enable parallelism, necessary transformations must be
73+
applied before utilizing this pipeline.
74+
75+
It's designed to provide a generic solution for NVVM targets, generating NVVM
76+
and LLVM dialect code compatible with `mlir-cpu-runner` or execution engine.
77+
78+
#### Example:
79+
80+
Here's a snippet illustrating the use of primary dialects, including arith,
81+
within GPU code execution:
82+
83+
```
84+
func.func @main() {
85+
%c2 = arith.constant 2 : index
86+
%c1 = arith.constant 1 : index
87+
gpu.launch
88+
blocks(%0, %1, %2) in (%3 = %c1, %4 = %c1, %5 = %c1)
89+
threads(%6, %7, %8) in (%9 = %c2, %10 = %c1, %11 = %c1) {
90+
gpu.printf "Hello from %d\n" %6 : index
91+
gpu.terminator
92+
}
93+
return
94+
}
95+
```
96+
97+
The `gpu-lower-to-nvvm` pipeline compiles this input code to NVVM format as
98+
below. It provides customization options like specifying SM capability, PTX
99+
version, and optimization level. Once compiled, the resulting IR is ready for
100+
execution using `mlir-cpu-runner`. Alternatively, it can be translated into
101+
LLVM, expanding its utility within the system.
102+
103+
```
104+
mlir-opt example.mlir -gpu-lower-to-nvvm-pipeline = "cubin-chip=sm_90a cubin-features=+ptx80 opt-level=3"
105+
```
106+
63107
### Module serialization
64108
Attributes implementing the GPU Target Attribute Interface handle the
65109
serialization process and are called Target attributes. These attributes can be

mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,65 @@
99
#ifndef MLIR_DIALECT_GPU_PIPELINES_PASSES_H_
1010
#define MLIR_DIALECT_GPU_PIPELINES_PASSES_H_
1111

12+
#include "mlir/Pass/PassOptions.h"
13+
1214
namespace mlir {
1315
namespace gpu {
16+
17+
/// Options for the gpu to nvvm pipeline.
18+
struct GPUToNVVMPipelineOptions
19+
: public PassPipelineOptions<GPUToNVVMPipelineOptions> {
20+
PassOptions::Option<int64_t> indexBitWidth{
21+
*this, "index-bitwidth",
22+
llvm::cl::desc("Bitwidth of the index type for the host (warning this "
23+
"should be 64 until the GPU layering is fixed)"),
24+
llvm::cl::init(64)};
25+
PassOptions::Option<std::string> cubinTriple{
26+
*this, "cubin-triple",
27+
llvm::cl::desc("Triple to use to serialize to cubin."),
28+
llvm::cl::init("nvptx64-nvidia-cuda")};
29+
PassOptions::Option<std::string> cubinChip{
30+
*this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
31+
llvm::cl::init("sm_50")};
32+
PassOptions::Option<std::string> cubinFeatures{
33+
*this, "cubin-features",
34+
llvm::cl::desc("Features to use to serialize to cubin."),
35+
llvm::cl::init("+ptx60")};
36+
PassOptions::Option<std::string> cubinFormat{
37+
*this, "cubin-format",
38+
llvm::cl::desc("Compilation format to use to serialize to cubin."),
39+
llvm::cl::init("fatbin")};
40+
PassOptions::Option<int> optLevel{
41+
*this, "opt-level",
42+
llvm::cl::desc("Optimization level for NVVM compilation"),
43+
llvm::cl::init(2)};
44+
PassOptions::Option<bool> kernelUseBarePtrCallConv{
45+
*this, "kernel-bare-ptr-calling-convention",
46+
llvm::cl::desc(
47+
"Whether to use the bareptr calling convention on the kernel "
48+
"(warning this should be false until the GPU layering is fixed)"),
49+
llvm::cl::init(false)};
50+
PassOptions::Option<bool> hostUseBarePtrCallConv{
51+
*this, "host-bare-ptr-calling-convention",
52+
llvm::cl::desc(
53+
"Whether to use the bareptr calling convention on the host (warning "
54+
"this should be false until the GPU layering is fixed)"),
55+
llvm::cl::init(false)};
56+
};
57+
58+
//===----------------------------------------------------------------------===//
59+
// Building and Registering.
60+
//===----------------------------------------------------------------------===//
61+
62+
/// Adds the GPU to NVVM pipeline to the given pass manager. Transforms main
63+
/// dialects into NVVM targets. Begins with GPU code regions, then handles host
64+
/// code.
65+
void buildLowerToNVVMPassPipeline(OpPassManager &pm,
66+
const GPUToNVVMPipelineOptions &options);
67+
68+
/// Register all pipeleines for the `gpu` dialect.
1469
void registerGPUToNVVMPipeline();
70+
1571
} // namespace gpu
1672
} // namespace mlir
1773

mlir/lib/Dialect/GPU/Pipelines/GPUToNVVMPipeline.cpp

Lines changed: 15 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -40,54 +40,14 @@ using namespace mlir;
4040

4141
#if MLIR_CUDA_CONVERSIONS_ENABLED
4242
namespace {
43-
struct GPUToNVVMPipelineOptions
44-
: public PassPipelineOptions<GPUToNVVMPipelineOptions> {
45-
PassOptions::Option<int64_t> indexBitWidth{
46-
*this, "index-bitwidth",
47-
llvm::cl::desc("Bitwidth of the index type for the host (warning this "
48-
"should be 64 until the GPU layering is fixed)"),
49-
llvm::cl::init(64)};
50-
PassOptions::Option<std::string> cubinTriple{
51-
*this, "cubin-triple",
52-
llvm::cl::desc("Triple to use to serialize to cubin."),
53-
llvm::cl::init("nvptx64-nvidia-cuda")};
54-
PassOptions::Option<std::string> cubinChip{
55-
*this, "cubin-chip", llvm::cl::desc("Chip to use to serialize to cubin."),
56-
llvm::cl::init("sm_50")};
57-
PassOptions::Option<std::string> cubinFeatures{
58-
*this, "cubin-features",
59-
llvm::cl::desc("Features to use to serialize to cubin."),
60-
llvm::cl::init("+ptx60")};
61-
PassOptions::Option<std::string> cubinFormat{
62-
*this, "cubin-format",
63-
llvm::cl::desc("Compilation format to use to serialize to cubin."),
64-
llvm::cl::init("fatbin")};
65-
PassOptions::Option<int> optLevel{
66-
*this, "opt-level",
67-
llvm::cl::desc("Optimization level for NVVM compilation"),
68-
llvm::cl::init(2)};
69-
PassOptions::Option<bool> kernelUseBarePtrCallConv{
70-
*this, "kernel-bare-ptr-calling-convention",
71-
llvm::cl::desc(
72-
"Whether to use the bareptr calling convention on the kernel "
73-
"(warning this should be false until the GPU layering is fixed)"),
74-
llvm::cl::init(false)};
75-
PassOptions::Option<bool> hostUseBarePtrCallConv{
76-
*this, "host-bare-ptr-calling-convention",
77-
llvm::cl::desc(
78-
"Whether to use the bareptr calling convention on the host (warning "
79-
"this should be false until the GPU layering is fixed)"),
80-
llvm::cl::init(false)};
81-
};
8243

8344
//===----------------------------------------------------------------------===//
8445
// Common pipeline
8546
//===----------------------------------------------------------------------===//
86-
void buildCommonPassPipeline(OpPassManager &pm,
87-
const GPUToNVVMPipelineOptions &options) {
47+
void buildCommonPassPipeline(
48+
OpPassManager &pm, const mlir::gpu::GPUToNVVMPipelineOptions &options) {
8849
pm.addPass(createConvertNVGPUToNVVMPass());
8950
pm.addPass(createGpuKernelOutliningPass());
90-
pm.addPass(createConvertLinalgToLoopsPass());
9151
pm.addPass(createConvertVectorToSCFPass());
9252
pm.addPass(createConvertSCFToCFPass());
9353
pm.addPass(createConvertNVVMToLLVMPass());
@@ -114,7 +74,7 @@ void buildCommonPassPipeline(OpPassManager &pm,
11474
// GPUModule-specific stuff.
11575
//===----------------------------------------------------------------------===//
11676
void buildGpuPassPipeline(OpPassManager &pm,
117-
const GPUToNVVMPipelineOptions &options) {
77+
const mlir::gpu::GPUToNVVMPipelineOptions &options) {
11878
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
11979
ConvertGpuOpsToNVVMOpsOptions opt;
12080
opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
@@ -129,7 +89,7 @@ void buildGpuPassPipeline(OpPassManager &pm,
12989
// Host Post-GPU pipeline
13090
//===----------------------------------------------------------------------===//
13191
void buildHostPostPipeline(OpPassManager &pm,
132-
const GPUToNVVMPipelineOptions &options) {
92+
const mlir::gpu::GPUToNVVMPipelineOptions &options) {
13393
GpuToLLVMConversionPassOptions opt;
13494
opt.hostBarePtrCallConv = options.hostUseBarePtrCallConv;
13595
opt.kernelBarePtrCallConv = options.kernelUseBarePtrCallConv;
@@ -143,36 +103,28 @@ void buildHostPostPipeline(OpPassManager &pm,
143103
pm.addPass(createReconcileUnrealizedCastsPass());
144104
}
145105

146-
void buildLowerToNVVMPassPipeline(OpPassManager &pm,
147-
const GPUToNVVMPipelineOptions &options) {
148-
//===----------------------------------------------------------------------===//
149-
// Common pipeline
150-
//===----------------------------------------------------------------------===//
106+
} // namespace
107+
108+
void mlir::gpu::buildLowerToNVVMPassPipeline(
109+
OpPassManager &pm, const GPUToNVVMPipelineOptions &options) {
110+
// Common pipelines
151111
buildCommonPassPipeline(pm, options);
152112

153-
//===----------------------------------------------------------------------===//
154-
// GPUModule-specific stuff.
155-
//===----------------------------------------------------------------------===//
113+
// GPUModule-specific stuff
156114
buildGpuPassPipeline(pm, options);
157115

158-
//===----------------------------------------------------------------------===//
159-
// Host post-GPUModule-specific stuff.
160-
//===----------------------------------------------------------------------===//
116+
// Host post-GPUModule-specific stuff
161117
buildHostPostPipeline(pm, options);
162118
}
163-
} // namespace
164119

165-
namespace mlir {
166-
namespace gpu {
167-
void registerGPUToNVVMPipeline() {
120+
void mlir::gpu::registerGPUToNVVMPipeline() {
168121
PassPipelineRegistration<GPUToNVVMPipelineOptions>(
169-
"gpu-lower-to-nvvm",
170-
"The default pipeline lowers main dialects (arith, linalg, memref, scf, "
122+
"gpu-lower-to-nvvm-pipeline",
123+
"The default pipeline lowers main dialects (arith, memref, scf, "
171124
"vector, gpu, and nvgpu) to NVVM. It starts by lowering GPU code to the "
172125
"specified compilation target (default is fatbin) then lowers the host "
173126
"code.",
174127
buildLowerToNVVMPassPipeline);
175128
}
176-
} // namespace gpu
177-
} // namespace mlir
129+
178130
#endif // MLIR_CUDA_CONVERSIONS_ENABLED

mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/dump-ptx.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm -debug-only=serialize-to-isa \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \
33
// RUN: 2>&1 | FileCheck %s
44

55
// CHECK: Generated by LLVM NVPTX Back-End

mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
// RUN: mlir-opt \
55
// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse)" \
66
// RUN: %s \
7-
// RUN: | mlir-opt --gpu-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx71 cubin-format=%gpu_compilation_format" \
7+
// RUN: | mlir-opt --gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 cubin-features=+ptx71 cubin-format=%gpu_compilation_format" \
88
// RUN: | mlir-cpu-runner \
99
// RUN: --shared-libs=%mlir_cuda_runtime \
1010
// RUN: --shared-libs=%mlir_c_runner_utils \

mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-reduction-distribute.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" -canonicalize |\
22
// RUN: mlir-opt -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if |\
33
// RUN: mlir-opt -lower-affine -convert-vector-to-scf -convert-scf-to-cf -convert-vector-to-llvm \
4-
// RUN: -convert-arith-to-llvm -gpu-lower-to-nvvm | \
4+
// RUN: -convert-arith-to-llvm -gpu-lower-to-nvvm-pipeline | \
55
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
66
// RUN: -shared-libs=%mlir_cuda_runtime \
77
// RUN: -shared-libs=%mlir_c_runner_utils \

mlir/test/Integration/Dialect/Vector/GPU/CUDA/test-warp-distribute.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// everything on the same thread.
33
// RUN: mlir-opt %s -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
44
// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
5-
// RUN: -gpu-lower-to-nvvm | \
5+
// RUN: -gpu-lower-to-nvvm-pipeline | \
66
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
77
// RUN: -shared-libs=%mlir_cuda_runtime \
88
// RUN: -shared-libs=%mlir_c_runner_utils \
@@ -13,7 +13,7 @@
1313
// RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write" \
1414
// RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
1515
// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
16-
// RUN: -gpu-lower-to-nvvm | \
16+
// RUN: -gpu-lower-to-nvvm-pipeline | \
1717
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
1818
// RUN: -shared-libs=%mlir_cuda_runtime \
1919
// RUN: -shared-libs=%mlir_c_runner_utils \
@@ -23,7 +23,7 @@
2323
// RUN: mlir-opt %s -test-vector-warp-distribute="hoist-uniform distribute-transfer-write propagate-distribution" \
2424
// RUN: -test-vector-warp-distribute=rewrite-warp-ops-to-scf-if -canonicalize | \
2525
// RUN: mlir-opt -convert-vector-to-scf -convert-scf-to-cf -convert-cf-to-llvm -convert-vector-to-llvm -convert-arith-to-llvm \
26-
// RUN: -gpu-lower-to-nvvm | \
26+
// RUN: -gpu-lower-to-nvvm-pipeline | \
2727
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
2828
// RUN: -shared-libs=%mlir_cuda_runtime \
2929
// RUN: -shared-libs=%mlir_c_runner_utils \

mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f16-f16-accum.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: mlir-opt %s \
22
// RUN: -transform-interpreter \
33
// RUN: -test-transform-dialect-erase-schedule \
4-
// RUN: -gpu-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
4+
// RUN: -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
55
// RUN: | mlir-cpu-runner \
66
// RUN: --shared-libs=%mlir_cuda_runtime \
77
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/TensorCore/sm80/transform-mma-sync-matmul-f32.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// RUN: mlir-opt %s \
1212
// RUN: -transform-interpreter \
1313
// RUN: -test-transform-dialect-erase-schedule \
14-
// RUN: -gpu-lower-to-nvvm="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
14+
// RUN: -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 cubin-features=+ptx76 cubin-format=%gpu_compilation_format" \
1515
// RUN: | mlir-cpu-runner \
1616
// RUN: --shared-libs=%mlir_cuda_runtime \
1717
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f16.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-chip=sm_70 cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_70 cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32-bare-ptr.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// Similar to the wmma-matmul-f32 but but with the memref bare pointer lowering convention.
44
// This test also uses gpu.memcpy operations (instead of gpu.host_register).
55
// RUN: mlir-opt %s \
6-
// RUN: | mlir-opt -gpu-lower-to-nvvm="host-bare-ptr-calling-convention=1 kernel-bare-ptr-calling-convention=1 cubin-chip=sm_70 cubin-format=%gpu_compilation_format" \
6+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="host-bare-ptr-calling-convention=1 kernel-bare-ptr-calling-convention=1 cubin-chip=sm_70 cubin-format=%gpu_compilation_format" \
77
// RUN: | mlir-cpu-runner \
88
// RUN: --shared-libs=%mlir_cuda_runtime \
99
// RUN: --entry-point-result=void \

mlir/test/Integration/GPU/CUDA/TensorCore/wmma-matmul-f32.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-chip=sm_70 cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_70 cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \
@@ -8,7 +8,7 @@
88

99
// Same as above but with the memref bare pointer lowering convention.
1010
// RUN: mlir-opt %s \
11-
// RUN: | mlir-opt -gpu-lower-to-nvvm="kernel-bare-ptr-calling-convention=1 cubin-format=%gpu_compilation_format" \
11+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="kernel-bare-ptr-calling-convention=1 cubin-format=%gpu_compilation_format" \
1212
// RUN: | mlir-cpu-runner \
1313
// RUN: --shared-libs=%mlir_cuda_runtime \
1414
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-maxsi.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-minsi.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

mlir/test/Integration/GPU/CUDA/gpu-to-cubin.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: mlir-opt %s \
2-
// RUN: | mlir-opt -gpu-lower-to-nvvm="cubin-format=%gpu_compilation_format" \
2+
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
33
// RUN: | mlir-cpu-runner \
44
// RUN: --shared-libs=%mlir_cuda_runtime \
55
// RUN: --shared-libs=%mlir_runner_utils \

0 commit comments

Comments
 (0)