28
28
#include " mlir/Dialect/Func/IR/FuncOps.h"
29
29
#include " mlir/Dialect/GPU/IR/GPUDialect.h"
30
30
#include " mlir/Dialect/GPU/Transforms/Passes.h"
31
+ #include " mlir/Dialect/LLVMIR/LLVMDialect.h"
32
+ #include " mlir/Dialect/Linalg/Passes.h"
31
33
#include " mlir/Dialect/MemRef/Transforms/Passes.h"
32
34
#include " mlir/Pass/PassManager.h"
33
35
#include " mlir/Pass/PassOptions.h"
@@ -39,27 +41,11 @@ using namespace mlir;
39
41
namespace {
40
42
struct TestLowerToNVVMOptions
41
43
: public PassPipelineOptions<TestLowerToNVVMOptions> {
42
- PassOptions::Option<int64_t > hostIndexBitWidth {
43
- *this , " host- index-bitwidth" ,
44
+ PassOptions::Option<int64_t > indexBitWidth {
45
+ *this , " index-bitwidth" ,
44
46
llvm::cl::desc (" Bitwidth of the index type for the host (warning this "
45
47
" should be 64 until the GPU layering is fixed)" ),
46
48
llvm::cl::init (64 )};
47
- PassOptions::Option<bool > hostUseBarePtrCallConv{
48
- *this , " host-bare-ptr-calling-convention" ,
49
- llvm::cl::desc (
50
- " Whether to use the bareptr calling convention on the host (warning "
51
- " this should be false until the GPU layering is fixed)" ),
52
- llvm::cl::init (false )};
53
- PassOptions::Option<int64_t > kernelIndexBitWidth{
54
- *this , " kernel-index-bitwidth" ,
55
- llvm::cl::desc (" Bitwidth of the index type for the GPU kernels" ),
56
- llvm::cl::init (64 )};
57
- PassOptions::Option<bool > kernelUseBarePtrCallConv{
58
- *this , " kernel-bare-ptr-calling-convention" ,
59
- llvm::cl::desc (
60
- " Whether to use the bareptr calling convention on the kernel "
61
- " (warning this should be false until the GPU layering is fixed)" ),
62
- llvm::cl::init (false )};
63
49
PassOptions::Option<std::string> cubinTriple{
64
50
*this , " cubin-triple" ,
65
51
llvm::cl::desc (" Triple to use to serialize to cubin." ),
@@ -74,175 +60,78 @@ struct TestLowerToNVVMOptions
74
60
PassOptions::Option<std::string> cubinFormat{
75
61
*this , " cubin-format" ,
76
62
llvm::cl::desc (" Compilation format to use to serialize to cubin." ),
77
- llvm::cl::init (" isa " )};
63
+ llvm::cl::init (" bin " )};
78
64
PassOptions::Option<int > optLevel{
79
65
*this , " opt-level" ,
80
66
llvm::cl::desc (" Optimization level for NVVM compilation" ),
81
67
llvm::cl::init (2 )};
82
68
};
83
69
70
+ // ===----------------------------------------------------------------------===//
71
+ // Common pipeline
72
+ // ===----------------------------------------------------------------------===//
73
+ void buildCommonPassPipeline (OpPassManager &pm,
74
+ const TestLowerToNVVMOptions &options) {
75
+ pm.addPass (createConvertNVGPUToNVVMPass ());
76
+ pm.addPass (createGpuKernelOutliningPass ());
77
+ pm.addPass (createConvertLinalgToLoopsPass ());
78
+ pm.addPass (createConvertVectorToSCFPass ());
79
+ pm.addPass (createConvertSCFToCFPass ());
80
+ pm.addPass (createConvertNVVMToLLVMPass ());
81
+ pm.addPass (createConvertVectorToLLVMPass ());
82
+ pm.addPass (createConvertMathToLLVMPass ());
83
+ pm.addPass (createFinalizeMemRefToLLVMConversionPass ());
84
+ pm.addPass (createConvertFuncToLLVMPass ());
85
+ pm.addPass (memref::createExpandStridedMetadataPass ());
86
+
87
+ GpuNVVMAttachTargetOptions nvvmTargetOptions;
88
+ nvvmTargetOptions.triple = options.cubinTriple ;
89
+ nvvmTargetOptions.chip = options.cubinChip ;
90
+ nvvmTargetOptions.features = options.cubinFeatures ;
91
+ nvvmTargetOptions.optLevel = options.optLevel ;
92
+ pm.addPass (createGpuNVVMAttachTarget (nvvmTargetOptions));
93
+ pm.addPass (createLowerAffinePass ());
94
+ pm.addPass (createArithToLLVMConversionPass ());
95
+ ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
96
+ convertIndexToLLVMPassOpt.indexBitwidth = options.indexBitWidth ;
97
+ pm.addPass (createConvertIndexToLLVMPass (convertIndexToLLVMPassOpt));
98
+ pm.addPass (createCanonicalizerPass ());
99
+ pm.addPass (createCSEPass ());
100
+ }
101
+
84
102
// ===----------------------------------------------------------------------===//
85
103
// GPUModule-specific stuff.
86
104
// ===----------------------------------------------------------------------===//
87
105
void buildGpuPassPipeline (OpPassManager &pm,
88
106
const TestLowerToNVVMOptions &options) {
89
107
pm.addNestedPass <gpu::GPUModuleOp>(createStripDebugInfoPass ());
108
+ pm.addNestedPass <gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps ());
109
+ pm.addNestedPass <gpu::GPUModuleOp>(createCanonicalizerPass ());
110
+ pm.addNestedPass <gpu::GPUModuleOp>(createCSEPass ());
111
+ pm.addNestedPass <gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass ());
112
+ }
90
113
91
- pm.addNestedPass <gpu::GPUModuleOp>(createConvertVectorToSCFPass ());
92
- // Convert SCF to CF (always needed).
93
- pm.addNestedPass <gpu::GPUModuleOp>(createConvertSCFToCFPass ());
94
- // Convert Math to LLVM (always needed).
95
- pm.addNestedPass <gpu::GPUModuleOp>(createConvertMathToLLVMPass ());
96
- // Expand complicated MemRef operations before lowering them.
97
- pm.addNestedPass <gpu::GPUModuleOp>(memref::createExpandStridedMetadataPass ());
98
- // The expansion may create affine expressions. Get rid of them.
99
- pm.addNestedPass <gpu::GPUModuleOp>(createLowerAffinePass ());
100
-
101
- // Convert MemRef to LLVM (always needed).
102
- // TODO: C++20 designated initializers.
103
- FinalizeMemRefToLLVMConversionPassOptions
104
- finalizeMemRefToLLVMConversionPassOptions;
105
- // Must be 64b on the host, things don't compose properly around
106
- // gpu::LaunchOp and gpu::HostRegisterOp.
107
- // TODO: fix GPU layering.
108
- finalizeMemRefToLLVMConversionPassOptions.indexBitwidth =
109
- options.kernelIndexBitWidth ;
110
- finalizeMemRefToLLVMConversionPassOptions.useOpaquePointers = true ;
111
- pm.addNestedPass <gpu::GPUModuleOp>(createFinalizeMemRefToLLVMConversionPass (
112
- finalizeMemRefToLLVMConversionPassOptions));
113
-
114
- // Convert Func to LLVM (always needed).
115
- // TODO: C++20 designated initializers.
116
- ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions;
117
- // Must be 64b on the host, things don't compose properly around
118
- // gpu::LaunchOp and gpu::HostRegisterOp.
119
- // TODO: fix GPU layering.
120
- convertFuncToLLVMPassOptions.indexBitwidth = options.kernelIndexBitWidth ;
121
- convertFuncToLLVMPassOptions.useBarePtrCallConv =
122
- options.kernelUseBarePtrCallConv ;
123
- convertFuncToLLVMPassOptions.useOpaquePointers = true ;
124
- pm.addNestedPass <gpu::GPUModuleOp>(
125
- createConvertFuncToLLVMPass (convertFuncToLLVMPassOptions));
126
-
127
- // TODO: C++20 designated initializers.
128
- ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
129
- // Must be 64b on the host, things don't compose properly around
130
- // gpu::LaunchOp and gpu::HostRegisterOp.
131
- // TODO: fix GPU layering.
132
- convertIndexToLLVMPassOpt.indexBitwidth = options.kernelIndexBitWidth ;
133
- pm.addNestedPass <gpu::GPUModuleOp>(
134
- createConvertIndexToLLVMPass (convertIndexToLLVMPassOpt));
135
-
136
- // TODO: C++20 designated initializers.
137
- // The following pass is inconsistent.
138
- // TODO: fix inconsistence.
139
- ConvertGpuOpsToNVVMOpsOptions convertGpuOpsToNVVMOpsOptions;
140
- convertGpuOpsToNVVMOpsOptions.useBarePtrCallConv =
141
- options.kernelUseBarePtrCallConv ;
142
- convertGpuOpsToNVVMOpsOptions.indexBitwidth = options.kernelIndexBitWidth ;
143
- convertGpuOpsToNVVMOpsOptions.useOpaquePointers = true ;
144
- pm.addNestedPass <gpu::GPUModuleOp>(
145
- createConvertGpuOpsToNVVMOps (convertGpuOpsToNVVMOpsOptions));
146
-
147
- pm.addNestedPass <gpu::GPUModuleOp>(createConvertSCFToCFPass ());
148
-
149
- // Convert vector to LLVM (always needed).
150
- // TODO: C++20 designated initializers.
151
- ConvertVectorToLLVMPassOptions convertVectorToLLVMPassOptions;
152
- convertVectorToLLVMPassOptions.reassociateFPReductions = true ;
153
- pm.addNestedPass <gpu::GPUModuleOp>(
154
- createConvertVectorToLLVMPass (convertVectorToLLVMPassOptions));
155
-
156
- // This pass is needed for PTX building
157
- pm.addNestedPass <gpu::GPUModuleOp>(createConvertNVVMToLLVMPass ());
114
+ // ===----------------------------------------------------------------------===//
115
+ // Host Post-GPU pipeline
116
+ // ===----------------------------------------------------------------------===//
117
+ void buildHostPostPipeline (OpPassManager &pm,
118
+ const TestLowerToNVVMOptions &options) {
119
+ pm.addPass (createGpuToLLVMConversionPass ());
158
120
159
- // Sprinkle some cleanups.
121
+ GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
122
+ gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat ;
123
+ pm.addPass (createGpuModuleToBinaryPass (gpuModuleToBinaryPassOptions));
160
124
pm.addPass (createCanonicalizerPass ());
161
125
pm.addPass (createCSEPass ());
162
-
163
- // Finally we can reconcile unrealized casts.
164
- pm.addNestedPass <gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass ());
126
+ pm.addPass (createReconcileUnrealizedCastsPass ());
165
127
}
166
128
167
129
void buildLowerToNVVMPassPipeline (OpPassManager &pm,
168
130
const TestLowerToNVVMOptions &options) {
169
- // Start with a cleanup pass.
170
- pm.addPass (createCanonicalizerPass ());
171
- pm.addPass (createCSEPass ());
172
-
173
131
// ===----------------------------------------------------------------------===//
174
- // NVGPU lowers device code as well as host code to the driver, so must run
175
- // before outlining.
132
+ // Common pipeline
176
133
// ===----------------------------------------------------------------------===//
177
- // TODO: C++20 designated initializers.
178
- ConvertNVGPUToNVVMPassOptions convertNVGPUToNVVMPassOptions;
179
- convertNVGPUToNVVMPassOptions.useOpaquePointers = true ;
180
- pm.addNestedPass <func::FuncOp>(
181
- createConvertNVGPUToNVVMPass (convertNVGPUToNVVMPassOptions));
182
-
183
- // ===----------------------------------------------------------------------===//
184
- // Host-specific stuff.
185
- // ===----------------------------------------------------------------------===//
186
- // Important, must be run at the top-level.
187
- pm.addPass (createGpuKernelOutliningPass ());
188
-
189
- // Important, all host passes must be run at the func level so that host
190
- // conversions can remain with 64 bit indices without polluting the GPU
191
- // kernel that may have 32 bit indices.
192
- // Must be 64b on the host, things don't compose properly around
193
- // gpu::LaunchOp and gpu::HostRegisterOp.
194
- // TODO: fix GPU layering.
195
- pm.addNestedPass <func::FuncOp>(createConvertVectorToSCFPass ());
196
- // Convert SCF to CF (always needed).
197
- pm.addNestedPass <func::FuncOp>(createConvertSCFToCFPass ());
198
- // Convert Math to LLVM (always needed).
199
- pm.addNestedPass <func::FuncOp>(createConvertMathToLLVMPass ());
200
- // Expand complicated MemRef operations before lowering them.
201
- pm.addNestedPass <func::FuncOp>(memref::createExpandStridedMetadataPass ());
202
- // The expansion may create affine expressions. Get rid of them.
203
- pm.addNestedPass <func::FuncOp>(createLowerAffinePass ());
204
-
205
- // Convert MemRef to LLVM (always needed).
206
- // TODO: C++20 designated initializers.
207
- FinalizeMemRefToLLVMConversionPassOptions
208
- finalizeMemRefToLLVMConversionPassOptions;
209
- finalizeMemRefToLLVMConversionPassOptions.useAlignedAlloc = true ;
210
- // Must be 64b on the host, things don't compose properly around
211
- // gpu::LaunchOp and gpu::HostRegisterOp.
212
- // TODO: fix GPU layering.
213
- finalizeMemRefToLLVMConversionPassOptions.indexBitwidth =
214
- options.hostIndexBitWidth ;
215
- finalizeMemRefToLLVMConversionPassOptions.useOpaquePointers = true ;
216
- pm.addNestedPass <func::FuncOp>(createFinalizeMemRefToLLVMConversionPass (
217
- finalizeMemRefToLLVMConversionPassOptions));
218
-
219
- // Convert Func to LLVM (always needed).
220
- // TODO: C++20 designated initializers.
221
- ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions;
222
- // Must be 64b on the host, things don't compose properly around
223
- // gpu::LaunchOp and gpu::HostRegisterOp.
224
- // TODO: fix GPU layering.
225
- convertFuncToLLVMPassOptions.indexBitwidth = options.hostIndexBitWidth ;
226
- convertFuncToLLVMPassOptions.useBarePtrCallConv =
227
- options.hostUseBarePtrCallConv ;
228
- convertFuncToLLVMPassOptions.useOpaquePointers = true ;
229
- pm.addNestedPass <func::FuncOp>(
230
- createConvertFuncToLLVMPass (convertFuncToLLVMPassOptions));
231
-
232
- // TODO: C++20 designated initializers.
233
- ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt;
234
- // Must be 64b on the host, things don't compose properly around
235
- // gpu::LaunchOp and gpu::HostRegisterOp.
236
- // TODO: fix GPU layering.
237
- convertIndexToLLVMPassOpt.indexBitwidth = options.hostIndexBitWidth ;
238
- pm.addNestedPass <func::FuncOp>(
239
- createConvertIndexToLLVMPass (convertIndexToLLVMPassOpt));
240
-
241
- pm.addNestedPass <func::FuncOp>(createArithToLLVMConversionPass ());
242
-
243
- // Sprinkle some cleanups.
244
- pm.addNestedPass <func::FuncOp>(createCanonicalizerPass ());
245
- pm.addNestedPass <func::FuncOp>(createCSEPass ());
134
+ buildCommonPassPipeline (pm, options);
246
135
247
136
// ===----------------------------------------------------------------------===//
248
137
// GPUModule-specific stuff.
@@ -252,68 +141,7 @@ void buildLowerToNVVMPassPipeline(OpPassManager &pm,
252
141
// ===----------------------------------------------------------------------===//
253
142
// Host post-GPUModule-specific stuff.
254
143
// ===----------------------------------------------------------------------===//
255
- // Attach an NVVM target to all the GPU modules with the provided target
256
- // options.
257
- // TODO: C++20 designated initializers.
258
- GpuNVVMAttachTargetOptions nvvmTargetOptions;
259
- nvvmTargetOptions.triple = options.cubinTriple ;
260
- nvvmTargetOptions.chip = options.cubinChip ;
261
- nvvmTargetOptions.features = options.cubinFeatures ;
262
- nvvmTargetOptions.optLevel = options.optLevel ;
263
- pm.addPass (createGpuNVVMAttachTarget (nvvmTargetOptions));
264
-
265
- // Convert GPU to LLVM.
266
- // TODO: C++20 designated initializers.
267
- GpuToLLVMConversionPassOptions gpuToLLVMConversionOptions;
268
- // Note: hostBarePtrCallConv must be false for now otherwise
269
- // gpu::HostRegister is ill-defined: it wants unranked memrefs but can't
270
- // lower the to bare ptr.
271
- gpuToLLVMConversionOptions.hostBarePtrCallConv =
272
- options.hostUseBarePtrCallConv ;
273
- gpuToLLVMConversionOptions.kernelBarePtrCallConv =
274
- options.kernelUseBarePtrCallConv ;
275
- gpuToLLVMConversionOptions.useOpaquePointers = true ;
276
-
277
- // TODO: something useful here.
278
- // gpuToLLVMConversionOptions.gpuBinaryAnnotation = "";
279
- pm.addPass (createGpuToLLVMConversionPass (gpuToLLVMConversionOptions));
280
-
281
- // Serialize all GPU modules to binaries.
282
- GpuModuleToBinaryPassOptions gpuModuleToBinaryPassOptions;
283
- gpuModuleToBinaryPassOptions.compilationTarget = options.cubinFormat ;
284
- pm.addPass (createGpuModuleToBinaryPass (gpuModuleToBinaryPassOptions));
285
-
286
- // Convert vector to LLVM (always needed).
287
- // TODO: C++20 designated initializers.
288
- ConvertVectorToLLVMPassOptions convertVectorToLLVMPassOptions;
289
- convertVectorToLLVMPassOptions.reassociateFPReductions = true ;
290
- pm.addNestedPass <func::FuncOp>(
291
- createConvertVectorToLLVMPass (convertVectorToLLVMPassOptions));
292
-
293
- ConvertIndexToLLVMPassOptions convertIndexToLLVMPassOpt3;
294
- // Must be 64b on the host, things don't compose properly around
295
- // gpu::LaunchOp and gpu::HostRegisterOp.
296
- // TODO: fix GPU layering.
297
- convertIndexToLLVMPassOpt3.indexBitwidth = options.hostIndexBitWidth ;
298
- pm.addPass (createConvertIndexToLLVMPass (convertIndexToLLVMPassOpt3));
299
-
300
- // Convert Func to LLVM (always needed).
301
- // TODO: C++20 designated initializers.
302
- ConvertFuncToLLVMPassOptions convertFuncToLLVMPassOptions2;
303
- // Must be 64b on the host, things don't compose properly around
304
- // gpu::LaunchOp and gpu::HostRegisterOp.
305
- convertFuncToLLVMPassOptions2.indexBitwidth = options.hostIndexBitWidth ;
306
- convertFuncToLLVMPassOptions2.useBarePtrCallConv =
307
- options.hostUseBarePtrCallConv ;
308
- convertFuncToLLVMPassOptions2.useOpaquePointers = true ;
309
- pm.addPass (createConvertFuncToLLVMPass (convertFuncToLLVMPassOptions2));
310
-
311
- // Sprinkle some cleanups.
312
- pm.addPass (createCanonicalizerPass ());
313
- pm.addPass (createCSEPass ());
314
-
315
- // Finally we can reconcile unrealized casts.
316
- pm.addPass (createReconcileUnrealizedCastsPass ());
144
+ buildHostPostPipeline (pm, options);
317
145
}
318
146
} // namespace
319
147
0 commit comments