Skip to content

Commit 0aa831e

Browse files
authored
[mlir][GPU] Implement ValueBoundsOpInterface for GPU ID operations (llvm#122190)
The GPU ID operations already implement InferIntRangeInterface, which gives constant lower and upper bounds on those IDs when appropriate metadata is prentent on the operations or in the surrounding context. This commit uses that existing code to implement the ValueBoundsOpInterface, which is used when analyzing affine operations (unlike the integer range interface, which is used for arithmetic optimization). It also implements the interface for gpu.launch, where we can use it to express the constraint that block/grid sizes are equal to their value from outside the launch op and that the corresponding IDs are bounded above by that size. As a consequence, the test pass for this inference is updated to work on a FunctionOpInterface and not a func.func, creating minor churn in other tests.
1 parent 1b897f7 commit 0aa831e

16 files changed

+317
-15
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//===- ValueBoundsOpInterfaceImpl.h - Impl. of ValueBoundsOpInterface -----===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
10+
#define MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
11+
12+
namespace mlir {
13+
class DialectRegistry;
14+
15+
namespace gpu {
16+
void registerValueBoundsOpInterfaceExternalModels(DialectRegistry &registry);
17+
} // namespace gpu
18+
} // namespace mlir
19+
#endif // MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H

mlir/include/mlir/InitAllDialects.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "mlir/Dialect/EmitC/IR/EmitC.h"
3838
#include "mlir/Dialect/Func/IR/FuncOps.h"
3939
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
40+
#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"
4041
#include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h"
4142
#include "mlir/Dialect/IRDL/IR/IRDL.h"
4243
#include "mlir/Dialect/Index/IR/IndexDialect.h"
@@ -164,6 +165,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
164165
cf::registerBufferizableOpInterfaceExternalModels(registry);
165166
cf::registerBufferDeallocationOpInterfaceExternalModels(registry);
166167
gpu::registerBufferDeallocationOpInterfaceExternalModels(registry);
168+
gpu::registerValueBoundsOpInterfaceExternalModels(registry);
167169
LLVM::registerInlinerInterface(registry);
168170
linalg::registerAllDialectInterfaceImplementations(registry);
169171
linalg::registerRuntimeVerifiableOpInterfaceExternalModels(registry);

mlir/lib/Dialect/GPU/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
add_mlir_dialect_library(MLIRGPUDialect
22
IR/GPUDialect.cpp
33
IR/InferIntRangeInterfaceImpls.cpp
4+
IR/ValueBoundsOpInterfaceImpl.cpp
45

56
ADDITIONAL_HEADER_DIRS
67
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
@@ -40,7 +41,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
4041
Transforms/ShuffleRewriter.cpp
4142
Transforms/SPIRVAttachTarget.cpp
4243
Transforms/SubgroupReduceLowering.cpp
43-
44+
4445
OBJECT
4546

4647
ADDITIONAL_HEADER_DIRS

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "mlir/IR/TypeUtilities.h"
3030
#include "mlir/Interfaces/FunctionImplementation.h"
3131
#include "mlir/Interfaces/SideEffectInterfaces.h"
32+
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
3233
#include "mlir/Transforms/InliningUtils.h"
3334
#include "llvm/ADT/STLExtras.h"
3435
#include "llvm/ADT/TypeSwitch.h"
@@ -217,6 +218,10 @@ void GPUDialect::initialize() {
217218
addInterfaces<GPUInlinerInterface>();
218219
declarePromisedInterface<bufferization::BufferDeallocationOpInterface,
219220
TerminatorOp>();
221+
declarePromisedInterfaces<
222+
ValueBoundsOpInterface, ClusterDimOp, ClusterDimBlocksOp, ClusterIdOp,
223+
ClusterBlockIdOp, BlockDimOp, BlockIdOp, GridDimOp, ThreadIdOp, LaneIdOp,
224+
SubgroupIdOp, GlobalIdOp, NumSubgroupsOp, SubgroupSizeOp, LaunchOp>();
220225
}
221226

222227
static std::string getSparseHandleKeyword(SparseHandleKind kind) {
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
//===- ValueBoundsOpInterfaceImpl.cpp - Impl. of ValueBoundsOpInterface ---===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"
10+
11+
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
12+
#include "mlir/Interfaces/InferIntRangeInterface.h"
13+
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
14+
15+
using namespace mlir;
16+
using namespace mlir::gpu;
17+
18+
namespace {
19+
/// Implement ValueBoundsOpInterface (which only works on index-typed values,
20+
/// gathers a set of constraint expressions, and is used for affine analyses)
21+
/// in terms of InferIntRangeInterface (which works
22+
/// on arbitrary integer types, creates [min, max] ranges, and is used in for
23+
/// arithmetic simplification).
24+
template <typename Op>
25+
struct GpuIdOpInterface
26+
: public ValueBoundsOpInterface::ExternalModel<GpuIdOpInterface<Op>, Op> {
27+
void populateBoundsForIndexValue(Operation *op, Value value,
28+
ValueBoundsConstraintSet &cstr) const {
29+
auto inferrable = cast<InferIntRangeInterface>(op);
30+
assert(value == op->getResult(0) &&
31+
"inferring for value that isn't the GPU op's result");
32+
auto translateConstraint = [&](Value v, const ConstantIntRanges &range) {
33+
assert(v == value &&
34+
"GPU ID op inferring values for something that's not its result");
35+
cstr.bound(v) >= range.smin().getSExtValue();
36+
cstr.bound(v) <= range.smax().getSExtValue();
37+
};
38+
assert(inferrable->getNumOperands() == 0 && "ID ops have no operands");
39+
inferrable.inferResultRanges({}, translateConstraint);
40+
}
41+
};
42+
43+
struct GpuLaunchOpInterface
44+
: public ValueBoundsOpInterface::ExternalModel<GpuLaunchOpInterface,
45+
LaunchOp> {
46+
void populateBoundsForIndexValue(Operation *op, Value value,
47+
ValueBoundsConstraintSet &cstr) const {
48+
auto launchOp = cast<LaunchOp>(op);
49+
50+
Value sizeArg = nullptr;
51+
bool isSize = false;
52+
KernelDim3 gridSizeArgs = launchOp.getGridSizeOperandValues();
53+
KernelDim3 blockSizeArgs = launchOp.getBlockSizeOperandValues();
54+
55+
auto match = [&](KernelDim3 bodyArgs, KernelDim3 externalArgs,
56+
bool areSizeArgs) {
57+
if (value == bodyArgs.x) {
58+
sizeArg = externalArgs.x;
59+
isSize = areSizeArgs;
60+
}
61+
if (value == bodyArgs.y) {
62+
sizeArg = externalArgs.y;
63+
isSize = areSizeArgs;
64+
}
65+
if (value == bodyArgs.z) {
66+
sizeArg = externalArgs.z;
67+
isSize = areSizeArgs;
68+
}
69+
};
70+
match(launchOp.getThreadIds(), blockSizeArgs, false);
71+
match(launchOp.getBlockSize(), blockSizeArgs, true);
72+
match(launchOp.getBlockIds(), gridSizeArgs, false);
73+
match(launchOp.getGridSize(), gridSizeArgs, true);
74+
if (launchOp.hasClusterSize()) {
75+
KernelDim3 clusterSizeArgs = *launchOp.getClusterSizeOperandValues();
76+
match(*launchOp.getClusterIds(), clusterSizeArgs, false);
77+
match(*launchOp.getClusterSize(), clusterSizeArgs, true);
78+
}
79+
80+
if (!sizeArg)
81+
return;
82+
if (isSize) {
83+
cstr.bound(value) == cstr.getExpr(sizeArg);
84+
cstr.bound(value) >= 1;
85+
} else {
86+
cstr.bound(value) < cstr.getExpr(sizeArg);
87+
cstr.bound(value) >= 0;
88+
}
89+
}
90+
};
91+
} // namespace
92+
93+
void mlir::gpu::registerValueBoundsOpInterfaceExternalModels(
94+
DialectRegistry &registry) {
95+
registry.addExtension(+[](MLIRContext *ctx, GPUDialect *dialect) {
96+
#define REGISTER(X) X::attachInterface<GpuIdOpInterface<X>>(*ctx);
97+
REGISTER(ClusterDimOp)
98+
REGISTER(ClusterDimBlocksOp)
99+
REGISTER(ClusterIdOp)
100+
REGISTER(ClusterBlockIdOp)
101+
REGISTER(BlockDimOp)
102+
REGISTER(BlockIdOp)
103+
REGISTER(GridDimOp)
104+
REGISTER(ThreadIdOp)
105+
REGISTER(LaneIdOp)
106+
REGISTER(SubgroupIdOp)
107+
REGISTER(GlobalIdOp)
108+
REGISTER(NumSubgroupsOp)
109+
REGISTER(SubgroupSizeOp)
110+
#undef REGISTER
111+
112+
LaunchOp::attachInterface<GpuLaunchOpInterface>(*ctx);
113+
});
114+
}

mlir/test/Dialect/Affine/value-bounds-op-interface-impl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
22
// RUN: -split-input-file | FileCheck %s
33

44
// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)>

mlir/test/Dialect/Affine/value-bounds-reification.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
22
// RUN: -verify-diagnostics -split-input-file | FileCheck %s
33

4-
// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args use-arith-ops" \
4+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args use-arith-ops}))' \
55
// RUN: -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-ARITH
66

77
// CHECK-LABEL: func @reify_through_chain(

mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
22
// RUN: -verify-diagnostics -split-input-file | FileCheck %s
33

4-
// RUN: mlir-opt %s -test-affine-reify-value-bounds="use-arith-ops" \
4+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{use-arith-ops}))' \
55
// RUN: -verify-diagnostics -split-input-file | \
66
// RUN: FileCheck %s --check-prefix=CHECK-ARITH
77

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module( \
2+
// RUN: func.func(test-affine-reify-value-bounds), \
3+
// RUN: gpu.module(llvm.func(test-affine-reify-value-bounds)), \
4+
// RUN: gpu.module(gpu.func(test-affine-reify-value-bounds)))' \
5+
// RUN: -verify-diagnostics \
6+
// RUN: -split-input-file | FileCheck %s
7+
8+
// CHECK-LABEL: func @launch_func
9+
func.func @launch_func(%arg0 : index) {
10+
%c0 = arith.constant 0 : index
11+
%c1 = arith.constant 1 : index
12+
%c2 = arith.constant 2 : index
13+
%c4 = arith.constant 4 : index
14+
%c64 = arith.constant 64 : index
15+
gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %arg0, %grid_dim_y = %c4, %grid_dim_z = %c2)
16+
threads(%thread_id_x, %thread_id_y, %thread_id_z) in (%block_dim_x = %c64, %block_dim_y = %c4, %block_dim_z = %c2) {
17+
18+
// Sanity checks:
19+
// expected-error @below{{unknown}}
20+
"test.compare" (%thread_id_x, %c1) {cmp = "EQ"} : (index, index) -> ()
21+
// expected-remark @below{{false}}
22+
"test.compare" (%thread_id_x, %c64) {cmp = "GE"} : (index, index) -> ()
23+
24+
// expected-remark @below{{true}}
25+
"test.compare" (%grid_dim_x, %c1) {cmp = "GE"} : (index, index) -> ()
26+
// expected-remark @below{{true}}
27+
"test.compare" (%grid_dim_x, %arg0) {cmp = "EQ"} : (index, index) -> ()
28+
// expected-remark @below{{true}}
29+
"test.compare" (%grid_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
30+
// expected-remark @below{{true}}
31+
"test.compare" (%grid_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()
32+
33+
// expected-remark @below{{true}}
34+
"test.compare"(%block_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
35+
// expected-remark @below{{true}}
36+
"test.compare"(%block_id_x, %arg0) {cmp = "LT"} : (index, index) -> ()
37+
// expected-remark @below{{true}}
38+
"test.compare"(%block_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
39+
// expected-remark @below{{true}}
40+
"test.compare"(%block_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
41+
// expected-remark @below{{true}}
42+
"test.compare"(%block_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
43+
// expected-remark @below{{true}}
44+
"test.compare"(%block_id_z, %c2) {cmp = "LT"} : (index, index) -> ()
45+
46+
// expected-remark @below{{true}}
47+
"test.compare" (%block_dim_x, %c64) {cmp = "EQ"} : (index, index) -> ()
48+
// expected-remark @below{{true}}
49+
"test.compare" (%block_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
50+
// expected-remark @below{{true}}
51+
"test.compare" (%block_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()
52+
53+
// expected-remark @below{{true}}
54+
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
55+
// expected-remark @below{{true}}
56+
"test.compare"(%thread_id_x, %c64) {cmp = "LT"} : (index, index) -> ()
57+
// expected-remark @below{{true}}
58+
"test.compare"(%thread_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
59+
// expected-remark @below{{true}}
60+
"test.compare"(%thread_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
61+
// expected-remark @below{{true}}
62+
"test.compare"(%thread_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
63+
// expected-remark @below{{true}}
64+
"test.compare"(%thread_id_z, %c2) {cmp = "LT"} : (index, index) -> ()
65+
66+
// expected-remark @below{{true}}
67+
"test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
68+
gpu.terminator
69+
}
70+
71+
func.return
72+
}
73+
74+
// -----
75+
76+
// The tests for what the ranges are are located in int-range-interface.mlir,
77+
// so here we just make sure that the results of that interface propagate into
78+
// constraints.
79+
80+
// CHECK-LABEL: func @kernel
81+
module attributes {gpu.container_module} {
82+
gpu.module @gpu_module {
83+
llvm.func @kernel() attributes {gpu.kernel} {
84+
85+
%c0 = arith.constant 0 : index
86+
%ctid_max = arith.constant 4294967295 : index
87+
%thread_id_x = gpu.thread_id x
88+
89+
// expected-remark @below{{true}}
90+
"test.compare" (%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
91+
// expected-remark @below{{true}}
92+
"test.compare" (%thread_id_x, %ctid_max) {cmp = "LT"} : (index, index) -> ()
93+
llvm.return
94+
}
95+
}
96+
}
97+
98+
// -----
99+
100+
// CHECK-LABEL: func @annotated_kernel
101+
module attributes {gpu.container_module} {
102+
gpu.module @gpu_module {
103+
gpu.func @annotated_kernel() kernel
104+
attributes {known_block_size = array<i32: 8, 12, 16>,
105+
known_grid_size = array<i32: 20, 24, 28>} {
106+
107+
%c0 = arith.constant 0 : index
108+
%c8 = arith.constant 8 : index
109+
%thread_id_x = gpu.thread_id x
110+
111+
// expected-remark @below{{true}}
112+
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
113+
// expected-remark @below{{true}}
114+
"test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()
115+
116+
%block_dim_x = gpu.block_dim x
117+
// expected-remark @below{{true}}
118+
"test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()
119+
120+
// expected-remark @below{{true}}
121+
"test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
122+
gpu.return
123+
}
124+
}
125+
}
126+
127+
// -----
128+
129+
// CHECK-LABEL: func @local_bounds_kernel
130+
module attributes {gpu.container_module} {
131+
gpu.module @gpu_module {
132+
gpu.func @local_bounds_kernel() kernel {
133+
134+
%c0 = arith.constant 0 : index
135+
%c1 = arith.constant 1 : index
136+
%c8 = arith.constant 8 : index
137+
138+
%block_dim_x = gpu.block_dim x upper_bound 8
139+
// expected-remark @below{{true}}
140+
"test.compare"(%block_dim_x, %c1) {cmp = "GE"} : (index, index) -> ()
141+
// expected-remark @below{{true}}
142+
"test.compare"(%block_dim_x, %c8) {cmp = "LE"} : (index, index) -> ()
143+
// expected-error @below{{unknown}}
144+
"test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()
145+
146+
%thread_id_x = gpu.thread_id x upper_bound 8
147+
// expected-remark @below{{true}}
148+
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
149+
// expected-remark @below{{true}}
150+
"test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()
151+
152+
// Note: there isn't a way to express the ID <= size constraint
153+
// in this form
154+
// expected-error @below{{unknown}}
155+
"test.compare"(%thread_id_x, %block_dim_x) {cmp = "LT"} : (index, index) -> ()
156+
gpu.return
157+
}
158+
}
159+
}

mlir/test/Dialect/Linalg/value-bounds-op-interface-impl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
22
// RUN: -split-input-file | FileCheck %s
33

44
// CHECK-LABEL: func @linalg_fill(

mlir/test/Dialect/MemRef/value-bounds-op-interface-impl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
22
// RUN: -split-input-file | FileCheck %s
33

44
// CHECK-LABEL: func @memref_alloc(

mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
22
// RUN: -verify-diagnostics -split-input-file | FileCheck %s
33

44
// CHECK-LABEL: func @scf_for(

mlir/test/Dialect/Tensor/value-bounds-op-interface-impl.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
1+
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
22
// RUN: -split-input-file | FileCheck %s
33

44
func.func @unknown_op() -> index {

0 commit comments

Comments
 (0)