Skip to content

[mlir][GPU] Implement ValueBoundsOpInterface for GPU ID operations #122190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions mlir/include/mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//===- ValueBoundsOpInterfaceImpl.h - Impl. of ValueBoundsOpInterface -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
#define MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H

namespace mlir {
class DialectRegistry;

namespace gpu {
void registerValueBoundsOpInterfaceExternalModels(DialectRegistry &registry);
} // namespace gpu
} // namespace mlir
#endif // MLIR_DIALECT_GPU_IR_VALUEBOUNDSOPINTERFACEIMPL_H
2 changes: 2 additions & 0 deletions mlir/include/mlir/InitAllDialects.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "mlir/Dialect/EmitC/IR/EmitC.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"
#include "mlir/Dialect/GPU/Transforms/BufferDeallocationOpInterfaceImpl.h"
#include "mlir/Dialect/IRDL/IR/IRDL.h"
#include "mlir/Dialect/Index/IR/IndexDialect.h"
Expand Down Expand Up @@ -164,6 +165,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
cf::registerBufferizableOpInterfaceExternalModels(registry);
cf::registerBufferDeallocationOpInterfaceExternalModels(registry);
gpu::registerBufferDeallocationOpInterfaceExternalModels(registry);
gpu::registerValueBoundsOpInterfaceExternalModels(registry);
LLVM::registerInlinerInterface(registry);
linalg::registerAllDialectInterfaceImplementations(registry);
linalg::registerRuntimeVerifiableOpInterfaceExternalModels(registry);
Expand Down
3 changes: 2 additions & 1 deletion mlir/lib/Dialect/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_mlir_dialect_library(MLIRGPUDialect
IR/GPUDialect.cpp
IR/InferIntRangeInterfaceImpls.cpp
IR/ValueBoundsOpInterfaceImpl.cpp

ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
Expand Down Expand Up @@ -40,7 +41,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
Transforms/ShuffleRewriter.cpp
Transforms/SPIRVAttachTarget.cpp
Transforms/SubgroupReduceLowering.cpp

OBJECT

ADDITIONAL_HEADER_DIRS
Expand Down
5 changes: 5 additions & 0 deletions mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Interfaces/FunctionImplementation.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"
#include "mlir/Transforms/InliningUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/TypeSwitch.h"
Expand Down Expand Up @@ -217,6 +218,10 @@ void GPUDialect::initialize() {
addInterfaces<GPUInlinerInterface>();
declarePromisedInterface<bufferization::BufferDeallocationOpInterface,
TerminatorOp>();
declarePromisedInterfaces<
ValueBoundsOpInterface, ClusterDimOp, ClusterDimBlocksOp, ClusterIdOp,
ClusterBlockIdOp, BlockDimOp, BlockIdOp, GridDimOp, ThreadIdOp, LaneIdOp,
SubgroupIdOp, GlobalIdOp, NumSubgroupsOp, SubgroupSizeOp, LaunchOp>();
}

static std::string getSparseHandleKeyword(SparseHandleKind kind) {
Expand Down
114 changes: 114 additions & 0 deletions mlir/lib/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
//===- ValueBoundsOpInterfaceImpl.cpp - Impl. of ValueBoundsOpInterface ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"

#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Interfaces/InferIntRangeInterface.h"
#include "mlir/Interfaces/ValueBoundsOpInterface.h"

using namespace mlir;
using namespace mlir::gpu;

namespace {
/// Implement ValueBoundsOpInterface (which only works on index-typed values,
/// gathers a set of constraint expressions, and is used for affine analyses)
/// in terms of InferIntRangeInterface (which works
/// on arbitrary integer types, creates [min, max] ranges, and is used in for
/// arithmetic simplification).
template <typename Op>
struct GpuIdOpInterface
: public ValueBoundsOpInterface::ExternalModel<GpuIdOpInterface<Op>, Op> {
void populateBoundsForIndexValue(Operation *op, Value value,
ValueBoundsConstraintSet &cstr) const {
auto inferrable = cast<InferIntRangeInterface>(op);
assert(value == op->getResult(0) &&
"inferring for value that isn't the GPU op's result");
auto translateConstraint = [&](Value v, const ConstantIntRanges &range) {
assert(v == value &&
"GPU ID op inferring values for something that's not its result");
cstr.bound(v) >= range.smin().getSExtValue();
cstr.bound(v) <= range.smax().getSExtValue();
};
// No arguments, so we don't need to pass in their ranges.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to add an assert(op->getNumOperands() == 0 && "expected op with no operands");?

inferrable.inferResultRanges({}, translateConstraint);
}
};

struct GpuLaunchOpInterface
: public ValueBoundsOpInterface::ExternalModel<GpuLaunchOpInterface,
LaunchOp> {
void populateBoundsForIndexValue(Operation *op, Value value,
ValueBoundsConstraintSet &cstr) const {
auto launchOp = cast<LaunchOp>(op);

Value sizeArg = nullptr;
bool isSize = false;
KernelDim3 gridSizeArgs = launchOp.getGridSizeOperandValues();
KernelDim3 blockSizeArgs = launchOp.getBlockSizeOperandValues();

auto match = [&](KernelDim3 bodyArgs, KernelDim3 externalArgs,
bool areSizeArgs) {
if (value == bodyArgs.x) {
sizeArg = externalArgs.x;
isSize = areSizeArgs;
}
if (value == bodyArgs.y) {
sizeArg = externalArgs.y;
isSize = areSizeArgs;
}
if (value == bodyArgs.z) {
sizeArg = externalArgs.z;
isSize = areSizeArgs;
}
};
match(launchOp.getThreadIds(), blockSizeArgs, false);
match(launchOp.getBlockSize(), blockSizeArgs, true);
match(launchOp.getBlockIds(), gridSizeArgs, false);
match(launchOp.getGridSize(), gridSizeArgs, true);
if (launchOp.hasClusterSize()) {
KernelDim3 clusterSizeArgs = *launchOp.getClusterSizeOperandValues();
match(*launchOp.getClusterIds(), clusterSizeArgs, false);
match(*launchOp.getClusterSize(), clusterSizeArgs, true);
}

if (!sizeArg)
return;
if (isSize) {
cstr.bound(value) == cstr.getExpr(sizeArg);
cstr.bound(value) >= 1;
} else {
cstr.bound(value) < cstr.getExpr(sizeArg);
cstr.bound(value) >= 0;
}
}
};
} // namespace

void mlir::gpu::registerValueBoundsOpInterfaceExternalModels(
DialectRegistry &registry) {
registry.addExtension(+[](MLIRContext *ctx, GPUDialect *dialect) {
#define REGISTER(X) X::attachInterface<GpuIdOpInterface<X>>(*ctx);
REGISTER(ClusterDimOp)
REGISTER(ClusterDimBlocksOp)
REGISTER(ClusterIdOp)
REGISTER(ClusterBlockIdOp)
REGISTER(BlockDimOp)
REGISTER(BlockIdOp)
REGISTER(GridDimOp)
REGISTER(ThreadIdOp)
REGISTER(LaneIdOp)
REGISTER(SubgroupIdOp)
REGISTER(GlobalIdOp)
REGISTER(NumSubgroupsOp)
REGISTER(SubgroupSizeOp)
#undef REGISTER

LaunchOp::attachInterface<GpuLaunchOpInterface>(*ctx);
});
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)>
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Dialect/Affine/value-bounds-reification.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s

// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args use-arith-ops" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args use-arith-ops}))' \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-ARITH

// CHECK-LABEL: func @reify_through_chain(
Expand Down
4 changes: 2 additions & 2 deletions mlir/test/Dialect/Arith/value-bounds-op-interface-impl.mlir
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s

// RUN: mlir-opt %s -test-affine-reify-value-bounds="use-arith-ops" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{use-arith-ops}))' \
// RUN: -verify-diagnostics -split-input-file | \
// RUN: FileCheck %s --check-prefix=CHECK-ARITH

Expand Down
150 changes: 150 additions & 0 deletions mlir/test/Dialect/GPU/value-bounds-op-interface-impl.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// RUN: mlir-opt %s -pass-pipeline='builtin.module( \
// RUN: func.func(test-affine-reify-value-bounds), \
// RUN: gpu.module(llvm.func(test-affine-reify-value-bounds)), \
// RUN: gpu.module(gpu.func(test-affine-reify-value-bounds)))' \
// RUN: -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK-LABEL: func @launch_func
func.func @launch_func(%arg0 : index) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c4 = arith.constant 4 : index
%c64 = arith.constant 64 : index
gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %arg0, %grid_dim_y = %c4, %grid_dim_z = %c2)
threads(%thread_id_x, %thread_id_y, %thread_id_z) in (%block_dim_x = %c64, %block_dim_y = %c4, %block_dim_z = %c2) {

// Sanity checks:
// expected-error @below{{unknown}}
"test.compare" (%thread_id_x, %c1) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{false}}
"test.compare" (%thread_id_x, %c64) {cmp = "GE"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare" (%grid_dim_x, %c1) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%grid_dim_x, %arg0) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%grid_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%grid_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare"(%block_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_x, %arg0) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_id_z, %c2) {cmp = "LT"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare" (%block_dim_x, %c64) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%block_dim_y, %c4) {cmp = "EQ"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%block_dim_z, %c2) {cmp = "EQ"} : (index, index) -> ()

// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c64) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_y, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_y, %c4) {cmp = "LT"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_z, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_z, %c2) {cmp = "LT"} : (index, index) -> ()
gpu.terminator
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add another check for %thread_id_y < %block_dim_y etc?

}

func.return
}

// -----

// The tests for what the ranges are are located in int-range-interface.mlir,
// so here we just make sure that the results of that interface propagate into
// constraints.

// CHECK-LABEL: func @kernel
module attributes {gpu.container_module} {
gpu.module @gpu_module {
llvm.func @kernel() attributes {gpu.kernel} {

%c0 = arith.constant 0 : index
%ctid_max = arith.constant 4294967295 : index
%thread_id_x = gpu.thread_id x

// expected-remark @below{{true}}
"test.compare" (%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare" (%thread_id_x, %ctid_max) {cmp = "LT"} : (index, index) -> ()
llvm.return
}
}
}

// -----

// CHECK-LABEL: func @annotated_kernel
module attributes {gpu.container_module} {
gpu.module @gpu_module {
gpu.func @annotated_kernel() kernel
attributes {known_block_size = array<i32: 8, 12, 16>,
known_grid_size = array<i32: 20, 24, 28>} {

%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%thread_id_x = gpu.thread_id x

// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()

%block_dim_x = gpu.block_dim x
// expected-remark @below{{true}}
"test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()

gpu.return
}
}
}

// -----

// CHECK-LABEL: func @local_bounds_kernel
module attributes {gpu.container_module} {
gpu.module @gpu_module {
gpu.func @local_bounds_kernel() kernel {

%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c8 = arith.constant 8 : index

%block_dim_x = gpu.block_dim x upper_bound 8
// expected-remark @below{{true}}
"test.compare"(%block_dim_x, %c1) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%block_dim_x, %c8) {cmp = "LE"} : (index, index) -> ()
// expected-error @below{{unknown}}
"test.compare"(%block_dim_x, %c8) {cmp = "EQ"} : (index, index) -> ()

%thread_id_x = gpu.thread_id x upper_bound 8
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c0) {cmp = "GE"} : (index, index) -> ()
// expected-remark @below{{true}}
"test.compare"(%thread_id_x, %c8) {cmp = "LT"} : (index, index) -> ()

gpu.return
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK-LABEL: func @linalg_fill(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK-LABEL: func @memref_alloc(
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/SCF/value-bounds-op-interface-impl.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds="reify-to-func-args" \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds{reify-to-func-args}))' \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s

// CHECK-LABEL: func @scf_for(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

func.func @unknown_op() -> index {
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Dialect/Vector/test-scalable-bounds.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -cse -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -cse -verify-diagnostics \
// RUN: -verify-diagnostics -split-input-file | FileCheck %s

#map_dim_i = affine_map<(d0)[s0] -> (-d0 + 32400, s0)>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -test-affine-reify-value-bounds -verify-diagnostics \
// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(test-affine-reify-value-bounds))' -verify-diagnostics \
// RUN: -split-input-file | FileCheck %s

// CHECK-LABEL: func @vector_transfer_write(
Expand Down
Loading
Loading