Skip to content

Commit f3b4c00

Browse files
authored
[mlir][gpu] Add builder to gpu.launch_func (#95541)
This patch adds a builder to `gpu.launch_func` allowing it to be created using `SymbolRefAttr` instead of `GPUFuncOp`. This allows creating `launch_func` when only a `gpu.binary` is present, instead of the full `gpu.module {...}`.
1 parent 0938cdb commit f3b4c00

File tree

2 files changed

+24
-5
lines changed

2 files changed

+24
-5
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,12 @@ def GPU_LaunchFuncOp :GPU_Op<"launch_func", [
653653
CArg<"Type", "nullptr">:$asyncTokenType,
654654
CArg<"ValueRange", "{}">:$asyncDependencies,
655655
CArg<"std::optional<KernelDim3>", "std::nullopt">:$clusterSize)>,
656+
OpBuilder<(ins "SymbolRefAttr":$kernel, "KernelDim3":$gridSize,
657+
"KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize,
658+
"ValueRange":$kernelOperands,
659+
"Type":$asyncTokenType,
660+
CArg<"ValueRange", "{}">:$asyncDependencies,
661+
CArg<"std::optional<KernelDim3>", "std::nullopt">:$clusterSize)>,
656662
OpBuilder<(ins "SymbolRefAttr":$kernel, "KernelDim3":$gridSize,
657663
"KernelDim3":$blockSize, "Value":$dynamicSharedMemorySize,
658664
"ValueRange":$kernelOperands,

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,11 +1081,13 @@ BlockArgument LaunchOp::addPrivateAttribution(Type type, Location loc) {
10811081
//===----------------------------------------------------------------------===//
10821082

10831083
void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
1084-
GPUFuncOp kernelFunc, KernelDim3 gridSize,
1084+
SymbolRefAttr kernelSymbol, KernelDim3 gridSize,
10851085
KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
10861086
ValueRange kernelOperands, Type asyncTokenType,
10871087
ValueRange asyncDependencies,
10881088
std::optional<KernelDim3> clusterSize) {
1089+
assert(kernelSymbol.getNestedReferences().size() == 1 &&
1090+
"expected a symbol reference with a single nested reference");
10891091
result.addOperands(asyncDependencies);
10901092
if (asyncTokenType)
10911093
result.types.push_back(builder.getType<AsyncTokenType>());
@@ -1098,10 +1100,6 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
10981100
if (dynamicSharedMemorySize)
10991101
result.addOperands(dynamicSharedMemorySize);
11001102
result.addOperands(kernelOperands);
1101-
auto kernelModule = kernelFunc->getParentOfType<GPUModuleOp>();
1102-
auto kernelSymbol =
1103-
SymbolRefAttr::get(kernelModule.getNameAttr(),
1104-
{SymbolRefAttr::get(kernelFunc.getNameAttr())});
11051103

11061104
Properties &prop = result.getOrAddProperties<Properties>();
11071105
prop.kernel = kernelSymbol;
@@ -1122,6 +1120,21 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
11221120
prop.operandSegmentSizes[segmentSizesLen - 1] = 0;
11231121
}
11241122

1123+
void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
1124+
GPUFuncOp kernelFunc, KernelDim3 gridSize,
1125+
KernelDim3 getBlockSize, Value dynamicSharedMemorySize,
1126+
ValueRange kernelOperands, Type asyncTokenType,
1127+
ValueRange asyncDependencies,
1128+
std::optional<KernelDim3> clusterSize) {
1129+
auto kernelModule = kernelFunc->getParentOfType<GPUModuleOp>();
1130+
auto kernelSymbol =
1131+
SymbolRefAttr::get(kernelModule.getNameAttr(),
1132+
{SymbolRefAttr::get(kernelFunc.getNameAttr())});
1133+
build(builder, result, kernelSymbol, gridSize, getBlockSize,
1134+
dynamicSharedMemorySize, kernelOperands, asyncTokenType,
1135+
asyncDependencies, clusterSize);
1136+
}
1137+
11251138
void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
11261139
SymbolRefAttr kernel, KernelDim3 gridSize,
11271140
KernelDim3 getBlockSize, Value dynamicSharedMemorySize,

0 commit comments

Comments
 (0)