Skip to content

Commit ba8ae98

Browse files
authored
[MLIR] Fixes NVGPU Integration Test Passes Ordering (#69934)
The test-`lower-to-nvvm pipeline`, designed for NVGPU dialect within GPU kernels, plays important role for compiling integration tests. This PR restructured the passes, and cleaned up the code. It also fixes the order of pipelines. This fix is needed for #69913
1 parent cdaaa4d commit ba8ae98

File tree

1 file changed

+55
-227
lines changed

1 file changed

+55
-227
lines changed

mlir/test/lib/Dialect/GPU/TestLowerToNVVM.cpp

Lines changed: 55 additions & 227 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
#include "mlir/Dialect/Func/IR/FuncOps.h"
2929
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
3030
#include "mlir/Dialect/GPU/Transforms/Passes.h"
31+
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
32+
#include "mlir/Dialect/Linalg/Passes.h"
3133
#include "mlir/Dialect/MemRef/Transforms/Passes.h"
3234
#include "mlir/Pass/PassManager.h"
3335
#include "mlir/Pass/PassOptions.h"
@@ -39,27 +41,11 @@ using namespace mlir;
3941
namespace {
4042
struct TestLowerToNVVMOptions
4143
: public PassPipelineOptions<TestLowerToNVVMOptions> {
42-
PassOptions::Option<int64_t> hostIndexBitWidth{
43-
*this, "host-index-bitwidth",
44+
PassOptions::Option<int64_t> indexBitWidth{
45+
*this, "index-bitwidth",
4446
llvm::cl::desc("Bitwidth of the index type for the host (warning this "
4547
"should be 64 until the GPU layering is fixed)"),
4648
llvm::cl::init(64)};
47-
PassOptions::Option<bool> hostUseBarePtrCallConv{
48-
*this, "host-bare-ptr-calling-convention",
49-
llvm::cl::desc(
50-
"Whether to use the bareptr calling convention on the host (warning "
51-
"this should be false until the GPU layering is fixed)"),
52-
llvm::cl::init(false)};
53-
PassOptions::Option<int64_t> kernelIndexBitWidth{
54-
*this, "kernel-index-bitwidth",
55-
llvm::cl::desc("Bitwidth of the index type for the GPU kernels"),
56-
llvm::cl::init(64)};
57-
PassOptions::Option<bool> kernelUseBarePtrCallConv{
58-
*this, "kernel-bare-ptr-calling-convention",
59-
llvm::cl::desc(
60-
"Whether to use the bareptr calling convention on the kernel "
61-
"(warning this should be false until the GPU layering is fixed)"),
62-
llvm::cl::init(false)};
6349
PassOptions::Option<std::string> cubinTriple{
6450
*this, "cubin-triple",
6551
llvm::cl::desc("Triple to use to serialize to cubin."),
@@ -74,175 +60,78 @@ struct TestLowerToNVVMOptions
7460
PassOptions::Option<std::string> cubinFormat{
7561
*this, "cubin-format",
7662
llvm::cl::desc("Compilation format to use to serialize to cubin."),
77-
llvm::cl::init("isa")};
63+
llvm::cl::init("bin")};
7864
PassOptions::Option<int> optLevel{
7965
*this, "opt-level",
8066
llvm::cl::desc("Optimization level for NVVM compilation"),
8167
llvm::cl::init(2)};
8268
};
8369

70+
//===----------------------------------------------------------------------===//
71+
// Common pipeline
72+
//===----------------------------------------------------------------------===//
73+
void buildCommonPassPipeline(OpPassManager &pm,
74+
const TestLowerToNVVMOptions &options) {
75+
pm.addPass(createConvertNVGPUToNVVMPass());
76+
pm.addPass(createGpuKernelOutliningPass());
77+
pm.addPass(createConvertLinalgToLoopsPass());
78+
pm.addPass(createConvertVectorToSCFPass());
79+
pm.addPass(createConvertSCFToCFPass());
80+
pm.addPass(createConvertNVVMToLLVMPass());
81+
pm.addPass(createConvertVectorToLLVMPass());
82+
pm.addPass(createConvertMathToLLVMPass());
83+
pm.addPass(createFinalizeMemRefToLLVMConversionPass());
84+
pm.addPass(createConvertFuncToLLVMPass());
85+
pm.addPass(memref::createExpandStridedMetadataPass());
86+
87+
GpuNVVMAttachTargetOptions nvvmTargetOptions;
88+
nvvmTargetOptions.triple = options.cubinTriple;
89+
nvvmTargetOptions.chip = options.cubinChip;
90+
nvvmTargetOptions.features = options.cubinFeatures;
91+
nvvmTargetOptions.optLevel = options.optLevel;
92+
pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
93+
pm.addPass(createLowerAffinePass());
94+
pm.addPass(createArithToLLVMConversionPass());
95+
ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
96+
convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth;
97+
pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
98+
pm.addPass(createCanonicalizerPass());
99+
pm.addPass(createCSEPass());
100+
}
101+
84102
//===----------------------------------------------------------------------===//
85103
// GPUModule-specific stuff.
86104
//===----------------------------------------------------------------------===//
87105
void buildGpuPassPipeline(OpPassManager &pm,
88106
const TestLowerToNVVMOptions &options) {
89107
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
108+
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
109+
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
110+
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
111+
pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
112+
}
90113

91-
pm.addNestedPass<gpu::GPUModuleOp>(createConvertVectorToSCFPass());
92-
// Convert SCF to CF (always needed).
93-
pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
94-
// Convert Math to LLVM (always needed).
95-
pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToLLVMPass());
96-
// Expand complicated MemRef operations before lowering them.
97-
pm.addNestedPass<gpu::GPUModuleOp>(memref::createExpandStridedMetadataPass());
98-
// The expansion may create affine expressions. Get rid of them.
99-
pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());
100-
101-
// Convert MemRef to LLVM (always needed).
102-
// TODO: C++20 designated initializers.
103-
FinalizeMemRefToLLVMConversionPassOptions
104-
finalizeMemRefToLLVMConversionPassOptions;
105-
// Must be 64b on the host, things don't compose properly around
106-
// gpu::LaunchOp and gpu::HostRegisterOp.
107-
// TODO: fix GPU layering.
108-
finalizeMemRefToLLVMConversionPassOptions.indexBitwidth =
109-
options.kernelIndexBitWidth;
110-
finalizeMemRefToLLVMConversionPassOptions.useOpaquePointers = true;
111-
pm.addNestedPass<gpu::GPUModuleOp>(createFinalizeMemRefToLLVMConversionPass(
112-
finalizeMemRefToLLVMConversionPassOptions));
113-
114-
// Convert Func to LLVM (always needed).
115-
// TODO: C++20 designated initializers.
116-
ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions;
117-
// Must be 64b on the host, things don't compose properly around
118-
// gpu::LaunchOp and gpu::HostRegisterOp.
119-
// TODO: fix GPU layering.
120-
convertFuncToLLVMPassOptions.indexBitwidth = options.kernelIndexBitWidth;
121-
convertFuncToLLVMPassOptions.useBarePtrCallConv =
122-
options.kernelUseBarePtrCallConv;
123-
convertFuncToLLVMPassOptions.useOpaquePointers = true;
124-
pm.addNestedPass<gpu::GPUModuleOp>(
125-
createConvertFuncToLLVMPass(convertFuncToLLVMPassOptions));
126-
127-
// TODO: C++20 designated initializers.
128-
ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
129-
// Must be 64b on the host, things don't compose properly around
130-
// gpu::LaunchOp and gpu::HostRegisterOp.
131-
// TODO: fix GPU layering.
132-
convertIndexToLLVMPassOpt.indexBitwidth = options.kernelIndexBitWidth;
133-
pm.addNestedPass<gpu::GPUModuleOp>(
134-
createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
135-
136-
// TODO: C++20 designated initializers.
137-
// The following pass is inconsistent.
138-
// TODO: fix inconsistence.
139-
ConvertGpuOpsToNVVMOpsOptions convertGpuOpsToNVVMOpsOptions;
140-
convertGpuOpsToNVVMOpsOptions.useBarePtrCallConv =
141-
options.kernelUseBarePtrCallConv;
142-
convertGpuOpsToNVVMOpsOptions.indexBitwidth = options.kernelIndexBitWidth;
143-
convertGpuOpsToNVVMOpsOptions.useOpaquePointers = true;
144-
pm.addNestedPass<gpu::GPUModuleOp>(
145-
createConvertGpuOpsToNVVMOps(convertGpuOpsToNVVMOpsOptions));
146-
147-
pm.addNestedPass<gpu::GPUModuleOp>(createConvertSCFToCFPass());
148-
149-
// Convert vector to LLVM (always needed).
150-
// TODO: C++20 designated initializers.
151-
ConvertVectorToLLVMPassOptions convertVectorToLLVMPassOptions;
152-
convertVectorToLLVMPassOptions.reassociateFPReductions = true;
153-
pm.addNestedPass<gpu::GPUModuleOp>(
154-
createConvertVectorToLLVMPass(convertVectorToLLVMPassOptions));
155-
156-
// This pass is needed for PTX building
157-
pm.addNestedPass<gpu::GPUModuleOp>(createConvertNVVMToLLVMPass());
114+
//===----------------------------------------------------------------------===//
115+
// Host Post-GPU pipeline
116+
//===----------------------------------------------------------------------===//
117+
void buildHostPostPipeline(OpPassManager &pm,
118+
const TestLowerToNVVMOptions &options) {
119+
pm.addPass(createGpuToLLVMConversionPass());
158120

159-
// Sprinkle some cleanups.
121+
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
122+
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
123+
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
160124
pm.addPass(createCanonicalizerPass());
161125
pm.addPass(createCSEPass());
162-
163-
// Finally we can reconcile unrealized casts.
164-
pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass());
126+
pm.addPass(createReconcileUnrealizedCastsPass());
165127
}
166128

167129
void buildLowerToNVVMPassPipeline(OpPassManager &pm,
168130
const TestLowerToNVVMOptions &options) {
169-
// Start with a cleanup pass.
170-
pm.addPass(createCanonicalizerPass());
171-
pm.addPass(createCSEPass());
172-
173131
//===----------------------------------------------------------------------===//
174-
// NVGPU lowers device code as well as host code to the driver, so must run
175-
// before outlining.
132+
// Common pipeline
176133
//===----------------------------------------------------------------------===//
177-
// TODO: C++20 designated initializers.
178-
ConvertNVGPUToNVVMPassOptions convertNVGPUToNVVMPassOptions;
179-
convertNVGPUToNVVMPassOptions.useOpaquePointers = true;
180-
pm.addNestedPass<func::FuncOp>(
181-
createConvertNVGPUToNVVMPass(convertNVGPUToNVVMPassOptions));
182-
183-
//===----------------------------------------------------------------------===//
184-
// Host-specific stuff.
185-
//===----------------------------------------------------------------------===//
186-
// Important, must be run at the top-level.
187-
pm.addPass(createGpuKernelOutliningPass());
188-
189-
// Important, all host passes must be run at the func level so that host
190-
// conversions can remain with 64 bit indices without polluting the GPU
191-
// kernel that may have 32 bit indices.
192-
// Must be 64b on the host, things don't compose properly around
193-
// gpu::LaunchOp and gpu::HostRegisterOp.
194-
// TODO: fix GPU layering.
195-
pm.addNestedPass<func::FuncOp>(createConvertVectorToSCFPass());
196-
// Convert SCF to CF (always needed).
197-
pm.addNestedPass<func::FuncOp>(createConvertSCFToCFPass());
198-
// Convert Math to LLVM (always needed).
199-
pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
200-
// Expand complicated MemRef operations before lowering them.
201-
pm.addNestedPass<func::FuncOp>(memref::createExpandStridedMetadataPass());
202-
// The expansion may create affine expressions. Get rid of them.
203-
pm.addNestedPass<func::FuncOp>(createLowerAffinePass());
204-
205-
// Convert MemRef to LLVM (always needed).
206-
// TODO: C++20 designated initializers.
207-
FinalizeMemRefToLLVMConversionPassOptions
208-
finalizeMemRefToLLVMConversionPassOptions;
209-
finalizeMemRefToLLVMConversionPassOptions.useAlignedAlloc = true;
210-
// Must be 64b on the host, things don't compose properly around
211-
// gpu::LaunchOp and gpu::HostRegisterOp.
212-
// TODO: fix GPU layering.
213-
finalizeMemRefToLLVMConversionPassOptions.indexBitwidth =
214-
options.hostIndexBitWidth;
215-
finalizeMemRefToLLVMConversionPassOptions.useOpaquePointers = true;
216-
pm.addNestedPass<func::FuncOp>(createFinalizeMemRefToLLVMConversionPass(
217-
finalizeMemRefToLLVMConversionPassOptions));
218-
219-
// Convert Func to LLVM (always needed).
220-
// TODO: C++20 designated initializers.
221-
ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions;
222-
// Must be 64b on the host, things don't compose properly around
223-
// gpu::LaunchOp and gpu::HostRegisterOp.
224-
// TODO: fix GPU layering.
225-
convertFuncToLLVMPassOptions.indexBitwidth = options.hostIndexBitWidth;
226-
convertFuncToLLVMPassOptions.useBarePtrCallConv =
227-
options.hostUseBarePtrCallConv;
228-
convertFuncToLLVMPassOptions.useOpaquePointers = true;
229-
pm.addNestedPass<func::FuncOp>(
230-
createConvertFuncToLLVMPass(convertFuncToLLVMPassOptions));
231-
232-
// TODO: C++20 designated initializers.
233-
ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
234-
// Must be 64b on the host, things don't compose properly around
235-
// gpu::LaunchOp and gpu::HostRegisterOp.
236-
// TODO: fix GPU layering.
237-
convertIndexToLLVMPassOpt.indexBitwidth = options.hostIndexBitWidth;
238-
pm.addNestedPass<func::FuncOp>(
239-
createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt));
240-
241-
pm.addNestedPass<func::FuncOp>(createArithToLLVMConversionPass());
242-
243-
// Sprinkle some cleanups.
244-
pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
245-
pm.addNestedPass<func::FuncOp>(createCSEPass());
134+
buildCommonPassPipeline(pm, options);
246135

247136
//===----------------------------------------------------------------------===//
248137
// GPUModule-specific stuff.
@@ -252,68 +141,7 @@ void buildLowerToNVVMPassPipeline(OpPassManager &pm,
252141
//===----------------------------------------------------------------------===//
253142
// Host post-GPUModule-specific stuff.
254143
//===----------------------------------------------------------------------===//
255-
// Attach an NVVM target to all the GPU modules with the provided target
256-
// options.
257-
// TODO: C++20 designated initializers.
258-
GpuNVVMAttachTargetOptions nvvmTargetOptions;
259-
nvvmTargetOptions.triple = options.cubinTriple;
260-
nvvmTargetOptions.chip = options.cubinChip;
261-
nvvmTargetOptions.features = options.cubinFeatures;
262-
nvvmTargetOptions.optLevel = options.optLevel;
263-
pm.addPass(createGpuNVVMAttachTarget(nvvmTargetOptions));
264-
265-
// Convert GPU to LLVM.
266-
// TODO: C++20 designated initializers.
267-
GpuToLLVMConversionPassOptions gpuToLLVMConversionOptions;
268-
// Note: hostBarePtrCallConv must be false for now otherwise
269-
// gpu::HostRegister is ill-defined: it wants unranked memrefs but can't
270-
// lower the to bare ptr.
271-
gpuToLLVMConversionOptions.hostBarePtrCallConv =
272-
options.hostUseBarePtrCallConv;
273-
gpuToLLVMConversionOptions.kernelBarePtrCallConv =
274-
options.kernelUseBarePtrCallConv;
275-
gpuToLLVMConversionOptions.useOpaquePointers = true;
276-
277-
// TODO: something useful here.
278-
// gpuToLLVMConversionOptions.gpuBinaryAnnotation = "";
279-
pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMConversionOptions));
280-
281-
// Serialize all GPU modules to binaries.
282-
GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
283-
gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat;
284-
pm.addPass(createGpuModuleToBinaryPass(gpuModuleToBinaryPassOptions));
285-
286-
// Convert vector to LLVM (always needed).
287-
// TODO: C++20 designated initializers.
288-
ConvertVectorToLLVMPassOptions convertVectorToLLVMPassOptions;
289-
convertVectorToLLVMPassOptions.reassociateFPReductions = true;
290-
pm.addNestedPass<func::FuncOp>(
291-
createConvertVectorToLLVMPass(convertVectorToLLVMPassOptions));
292-
293-
ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt3;
294-
// Must be 64b on the host, things don't compose properly around
295-
// gpu::LaunchOp and gpu::HostRegisterOp.
296-
// TODO: fix GPU layering.
297-
convertIndexToLLVMPassOpt3.indexBitwidth = options.hostIndexBitWidth;
298-
pm.addPass(createConvertIndexToLLVMPass(convertIndexToLLVMPassOpt3));
299-
300-
// Convert Func to LLVM (always needed).
301-
// TODO: C++20 designated initializers.
302-
ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions2;
303-
// Must be 64b on the host, things don't compose properly around
304-
// gpu::LaunchOp and gpu::HostRegisterOp.
305-
convertFuncToLLVMPassOptions2.indexBitwidth = options.hostIndexBitWidth;
306-
convertFuncToLLVMPassOptions2.useBarePtrCallConv =
307-
options.hostUseBarePtrCallConv;
308-
convertFuncToLLVMPassOptions2.useOpaquePointers = true;
309-
pm.addPass(createConvertFuncToLLVMPass(convertFuncToLLVMPassOptions2));
310-
311-
// Sprinkle some cleanups.
312-
pm.addPass(createCanonicalizerPass());
313-
pm.addPass(createCSEPass());
314-
315-
// Finally we can reconcile unrealized casts.
316-
pm.addPass(createReconcileUnrealizedCastsPass());
144+
buildHostPostPipeline(pm, options);
317145
}
318146
} // namespace
319147

0 commit comments

Comments
 (0)