Skip to content

Commit 7f19686

Browse files
j2kunjoker-eph
andauthored
Add a tutorial on mlir-opt (#96105)
This tutorial gives an introduction to the `mlir-opt` tool, focusing on how to run basic passes with and without options, run pass pipelines from the CLI, and point out particularly useful flags. --------- Co-authored-by: Jeremy Kun <[email protected]> Co-authored-by: Mehdi Amini <[email protected]>
1 parent c89e9e7 commit 7f19686

File tree

7 files changed

+416
-1
lines changed

7 files changed

+416
-1
lines changed

mlir/docs/Tutorials/MlirOpt.md

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
# Using `mlir-opt`
2+
3+
`mlir-opt` is a command-line entry point for running passes and lowerings on MLIR code.
4+
This tutorial will explain how to use `mlir-opt`, show some examples of its usage,
5+
and mention some useful tips for working with it.
6+
7+
Prerequisites:
8+
9+
- [Building MLIR from source](/getting_started/)
10+
- [MLIR Language Reference](/docs/LangRef/)
11+
12+
[TOC]
13+
14+
## `mlir-opt` basics
15+
16+
The `mlir-opt` tool loads a textual IR or bytecode into an in-memory structure,
17+
and optionally executes a sequence of passes
18+
before serializing back the IR (textual form by default).
19+
It is intended as a testing and debugging utility.
20+
21+
After building the MLIR project,
22+
the `mlir-opt` binary (located in `build/bin`)
23+
is the entry point for running passes and lowerings,
24+
as well as emitting debug and diagnostic data.
25+
26+
Running `mlir-opt` with no flags will consume textual or bytecode IR
27+
from the standard input, parse and run verifiers on it,
28+
and write the textual format back to the standard output.
29+
This is a good way to test if an input MLIR is well-formed.
30+
31+
`mlir-opt --help` shows a complete list of flags
32+
(there are nearly 1000).
33+
Each pass has its own flag,
34+
though it is recommended to use `--pass-pipeline`
35+
to run passes rather than bare flags.
36+
37+
## Running a pass
38+
39+
Next we run [`convert-to-llvm`](/docs/Passes/#-convert-to-llvm),
40+
which converts all supported dialects to the `llvm` dialect,
41+
on the following IR:
42+
43+
```mlir
44+
// mlir/test/Examples/mlir-opt/ctlz.mlir
45+
module {
46+
func.func @main(%arg0: i32) -> i32 {
47+
%0 = math.ctlz %arg0 : i32
48+
func.return %0 : i32
49+
}
50+
}
51+
```
52+
53+
After building MLIR, and from the `llvm-project` base directory, run
54+
55+
```bash
56+
build/bin/mlir-opt --pass-pipeline="builtin.module(convert-math-to-llvm)" mlir/test/Examples/mlir-opt/ctlz.mlir
57+
```
58+
59+
which produces
60+
61+
```mlir
62+
module {
63+
func.func @main(%arg0: i32) -> i32 {
64+
%0 = "llvm.intr.ctlz"(%arg0) <{is_zero_poison = false}> : (i32) -> i32
65+
return %0 : i32
66+
}
67+
}
68+
```
69+
70+
Note that `llvm` here is MLIR's `llvm` dialect,
71+
which would still need to be processed through `mlir-translate`
72+
to generate LLVM-IR.
73+
74+
## Running a pass with options
75+
76+
Next we will show how to run a pass that takes configuration options.
77+
Consider the following IR containing loops with poor cache locality.
78+
79+
```mlir
80+
// mlir/test/Examples/mlir-opt/loop_fusion.mlir
81+
module {
82+
func.func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
83+
%0 = memref.alloc() : memref<10xf32>
84+
%1 = memref.alloc() : memref<10xf32>
85+
%cst = arith.constant 0.000000e+00 : f32
86+
affine.for %arg2 = 0 to 10 {
87+
affine.store %cst, %0[%arg2] : memref<10xf32>
88+
affine.store %cst, %1[%arg2] : memref<10xf32>
89+
}
90+
affine.for %arg2 = 0 to 10 {
91+
%2 = affine.load %0[%arg2] : memref<10xf32>
92+
%3 = arith.addf %2, %2 : f32
93+
affine.store %3, %arg0[%arg2] : memref<10xf32>
94+
}
95+
affine.for %arg2 = 0 to 10 {
96+
%2 = affine.load %1[%arg2] : memref<10xf32>
97+
%3 = arith.mulf %2, %2 : f32
98+
affine.store %3, %arg1[%arg2] : memref<10xf32>
99+
}
100+
return
101+
}
102+
}
103+
```
104+
105+
Running this with the [`affine-loop-fusion`](/docs/Passes/#-affine-loop-fusion) pass
106+
produces a fused loop.
107+
108+
```bash
109+
build/bin/mlir-opt --pass-pipeline="builtin.module(affine-loop-fusion)" mlir/test/Examples/mlir-opt/loop_fusion.mlir
110+
```
111+
112+
```mlir
113+
module {
114+
func.func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
115+
%alloc = memref.alloc() : memref<1xf32>
116+
%alloc_0 = memref.alloc() : memref<1xf32>
117+
%cst = arith.constant 0.000000e+00 : f32
118+
affine.for %arg2 = 0 to 10 {
119+
affine.store %cst, %alloc[0] : memref<1xf32>
120+
affine.store %cst, %alloc_0[0] : memref<1xf32>
121+
%0 = affine.load %alloc_0[0] : memref<1xf32>
122+
%1 = arith.mulf %0, %0 : f32
123+
affine.store %1, %arg1[%arg2] : memref<10xf32>
124+
%2 = affine.load %alloc[0] : memref<1xf32>
125+
%3 = arith.addf %2, %2 : f32
126+
affine.store %3, %arg0[%arg2] : memref<10xf32>
127+
}
128+
return
129+
}
130+
}
131+
```
132+
133+
This pass has options that allow the user to configure its behavior.
134+
For example, the `fusion-compute-tolerance` option
135+
is described as the "fractional increase in additional computation tolerated while fusing."
136+
If this value is set to zero on the command line,
137+
the pass will not fuse the loops.
138+
139+
```bash
140+
build/bin/mlir-opt --pass-pipeline="builtin.module(affine-loop-fusion{fusion-compute-tolerance=0})" \
141+
mlir/test/Examples/mlir-opt/loop_fusion.mlir
142+
```
143+
144+
```mlir
145+
module {
146+
func.func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
147+
%alloc = memref.alloc() : memref<10xf32>
148+
%alloc_0 = memref.alloc() : memref<10xf32>
149+
%cst = arith.constant 0.000000e+00 : f32
150+
affine.for %arg2 = 0 to 10 {
151+
affine.store %cst, %alloc[%arg2] : memref<10xf32>
152+
affine.store %cst, %alloc_0[%arg2] : memref<10xf32>
153+
}
154+
affine.for %arg2 = 0 to 10 {
155+
%0 = affine.load %alloc[%arg2] : memref<10xf32>
156+
%1 = arith.addf %0, %0 : f32
157+
affine.store %1, %arg0[%arg2] : memref<10xf32>
158+
}
159+
affine.for %arg2 = 0 to 10 {
160+
%0 = affine.load %alloc_0[%arg2] : memref<10xf32>
161+
%1 = arith.mulf %0, %0 : f32
162+
affine.store %1, %arg1[%arg2] : memref<10xf32>
163+
}
164+
return
165+
}
166+
}
167+
```
168+
169+
Options passed to a pass
170+
are specified via the syntax `{option1=value1 option2=value2 ...}`,
171+
i.e., use space-separated `key=value` pairs for each option.
172+
173+
## Building a pass pipeline on the command line
174+
175+
The `--pass-pipeline` flag supports combining multiple passes into a pipeline.
176+
So far we have used the trivial pipeline with a single pass
177+
that is "anchored" on the top-level `builtin.module` op.
178+
[Pass anchoring](/docs/PassManagement/#oppassmanager)
179+
is a way for passes to specify
180+
that they only run on particular ops.
181+
While many passes are anchored on `builtin.module`,
182+
if you try to run a pass that is anchored on some other op
183+
inside `--pass-pipeline="builtin.module(pass-name)"`,
184+
it will not run.
185+
186+
Multiple passes can be chained together
187+
by providing the pass names in a comma-separated list
188+
in the `--pass-pipeline` string,
189+
e.g.,
190+
`--pass-pipeline="builtin.module(pass1,pass2)"`.
191+
The passes will be run sequentially.
192+
193+
To use passes that have nontrivial anchoring,
194+
the appropriate level of nesting must be specified
195+
in the pass pipeline.
196+
For example, consider the following IR which has the same redundant code,
197+
but in two different levels of nesting.
198+
199+
```mlir
200+
module {
201+
module {
202+
func.func @func1(%arg0: i32) -> i32 {
203+
%0 = arith.addi %arg0, %arg0 : i32
204+
%1 = arith.addi %arg0, %arg0 : i32
205+
%2 = arith.addi %0, %1 : i32
206+
func.return %2 : i32
207+
}
208+
}
209+
210+
gpu.module @gpu_module {
211+
gpu.func @func2(%arg0: i32) -> i32 {
212+
%0 = arith.addi %arg0, %arg0 : i32
213+
%1 = arith.addi %arg0, %arg0 : i32
214+
%2 = arith.addi %0, %1 : i32
215+
gpu.return %2 : i32
216+
}
217+
}
218+
}
219+
```
220+
221+
The following pipeline runs `cse` (common subexpression elimination)
222+
but only on the `func.func` inside the two `builtin.module` ops.
223+
224+
```bash
225+
build/bin/mlir-opt mlir/test/Examples/mlir-opt/ctlz.mlir --pass-pipeline='
226+
builtin.module(
227+
builtin.module(
228+
func.func(cse,canonicalize),
229+
convert-to-llvm
230+
)
231+
)'
232+
```
233+
234+
The output leaves the `gpu.module` alone
235+
236+
```mlir
237+
module {
238+
module {
239+
llvm.func @func1(%arg0: i32) -> i32 {
240+
%0 = llvm.add %arg0, %arg0 : i32
241+
%1 = llvm.add %0, %0 : i32
242+
llvm.return %1 : i32
243+
}
244+
}
245+
gpu.module @gpu_module {
246+
gpu.func @func2(%arg0: i32) -> i32 {
247+
%0 = arith.addi %arg0, %arg0 : i32
248+
%1 = arith.addi %arg0, %arg0 : i32
249+
%2 = arith.addi %0, %1 : i32
250+
gpu.return %2 : i32
251+
}
252+
}
253+
}
254+
```
255+
256+
Specifying a pass pipeline with nested anchoring
257+
is also beneficial for performance reasons:
258+
passes with anchoring can run on IR subsets in parallel,
259+
which provides better threaded runtime and cache locality
260+
within threads.
261+
For example,
262+
even if a pass is not restricted to anchor on `func.func`,
263+
running `builtin.module(func.func(cse, canonicalize))`
264+
is more efficient than `builtin.module(cse, canonicalize)`.
265+
266+
For a spec of the pass-pipeline textual description language,
267+
see [the docs](/docs/PassManagement/#textual-pass-pipeline-specification).
268+
For more general information on pass management, see [Pass Infrastructure](/docs/PassManagement/#).
269+
270+
## Useful CLI flags
271+
272+
- `--debug` prints all debug information produced by `LLVM_DEBUG` calls.
273+
- `--debug-only="my-tag"` prints only the debug information produced by `LLVM_DEBUG`
274+
in files that have the macro `#define DEBUG_TYPE "my-tag"`.
275+
This often allows you to print only debug information associated with a specific pass.
276+
- `"greedy-rewriter"` only prints debug information
277+
for patterns applied with the greedy rewriter engine.
278+
- `"dialect-conversion"` only prints debug information
279+
for the dialect conversion framework.
280+
- `--emit-bytecode` emits MLIR in the bytecode format.
281+
- `--mlir-pass-statistics` print statistics about the passes run.
282+
These are generated via [pass statistics](/docs/PassManagement/#pass-statistics).
283+
- `--mlir-print-ir-after-all` prints the IR after each pass.
284+
- See also `--mlir-print-ir-after-change`, `--mlir-print-ir-after-failure`,
285+
and analogous versions of these flags with `before` instead of `after`.
286+
- When using `print-ir` flags, adding `--mlir-print-ir-tree-dir` writes the
287+
IRs to files in a directory tree, making them easier to inspect versus a
288+
large dump to the terminal.
289+
- `--mlir-timing` displays execution times of each pass.
290+
291+
## Further readering
292+
293+
- [List of passes](/docs/Passes/)
294+
- [List of dialects](/docs/Dialects/)

mlir/lib/Pass/PassManagerOptions.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ struct PassManagerOptions {
6161
llvm::cl::opt<std::string> printTreeDir{
6262
"mlir-print-ir-tree-dir",
6363
llvm::cl::desc("When printing the IR before/after a pass, print file "
64-
"tree rooted at this directory")};
64+
"tree rooted at this directory. Use in conjunction with "
65+
"mlir-print-ir-* flags")};
6566

6667
/// Add an IR printing instrumentation if enabled by any 'print-ir' flags.
6768
void addPrinterInstrumentation(PassManager &pm);

mlir/test/Examples/mlir-opt/ctlz.mlir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: mlir-opt --pass-pipeline="builtin.module(convert-to-llvm)" %s | FileCheck %s
2+
3+
// CHECK-LABEL: @main
4+
// CHECK: llvm.intr.ctlz
5+
module {
6+
func.func @main(%arg0: i32) -> i32 {
7+
%0 = math.ctlz %arg0 : i32
8+
func.return %0 : i32
9+
}
10+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: mlir-opt --pass-pipeline='builtin.module(builtin.module(func.func(cse,canonicalize),convert-to-llvm))' %s | FileCheck %s
2+
3+
// CHECK-LABEL: llvm.func @func1
4+
// CHECK-NEXT: llvm.add
5+
// CHECK-NEXT: llvm.add
6+
// CHECK-NEXT: llvm.return
7+
module {
8+
module {
9+
func.func @func1(%arg0: i32) -> i32 {
10+
%0 = arith.addi %arg0, %arg0 : i32
11+
%1 = arith.addi %arg0, %arg0 : i32
12+
%2 = arith.addi %0, %1 : i32
13+
func.return %2 : i32
14+
}
15+
}
16+
17+
// CHECK-LABEL: @gpu_module
18+
// CHECK-LABEL: gpu.func @func2
19+
// CHECK-COUNT-3: arith.addi
20+
// CHECK-NEXT: gpu.return
21+
gpu.module @gpu_module {
22+
gpu.func @func2(%arg0: i32) -> i32 {
23+
%0 = arith.addi %arg0, %arg0 : i32
24+
%1 = arith.addi %arg0, %arg0 : i32
25+
%2 = arith.addi %0, %1 : i32
26+
gpu.return %2 : i32
27+
}
28+
}
29+
}
30+
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// This file is left in-tree despite having no assertions so it can be
2+
// referenced by the tutorial text.
3+
4+
// RUN: mlir-opt %s
5+
6+
module {
7+
func.func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
8+
%0 = memref.alloc() : memref<10xf32>
9+
%1 = memref.alloc() : memref<10xf32>
10+
%cst = arith.constant 0.000000e+00 : f32
11+
affine.for %arg2 = 0 to 10 {
12+
affine.store %cst, %0[%arg2] : memref<10xf32>
13+
affine.store %cst, %1[%arg2] : memref<10xf32>
14+
}
15+
affine.for %arg2 = 0 to 10 {
16+
%2 = affine.load %0[%arg2] : memref<10xf32>
17+
%3 = arith.addf %2, %2 : f32
18+
affine.store %3, %arg0[%arg2] : memref<10xf32>
19+
}
20+
affine.for %arg2 = 0 to 10 {
21+
%2 = affine.load %1[%arg2] : memref<10xf32>
22+
%3 = arith.mulf %2, %2 : f32
23+
affine.store %3, %arg1[%arg2] : memref<10xf32>
24+
}
25+
return
26+
}
27+
}

0 commit comments

Comments
 (0)