Skip to content

Commit a807bbe

Browse files
authored
[MLIR][GPUToLLVMSPV] Use llvm.func attributes to convert gpu.shuffle (#116967)
Use `llvm.func`'s `intel_reqd_sub_group_size` attribute instead of SPIR-V environment attributes in the `gpu.shuffle` conversion pattern. This metadata is needed to check the semantics of the operation are supported, i.e., it has a constant width and its value is equal to the sub-group size. As the pass also converts `gpu.func` to `llvm.func`, adding a discardable attribute of name `intel_reqd_sub_group_size` attribute to the latter is enough for this pattern to work. We no longer have a notion of "default" sub-group size, so this attribute needs to be set in the parent function for `gpu.shuffle` operations to be converted. Drop dependency on the SPIR-V dialect as we no longer require creating attributes from this dialect to lower `gpu.shuffle` instances. --------- Signed-off-by: Victor Perez <[email protected]>
1 parent 66126c3 commit a807bbe

File tree

4 files changed

+17
-97
lines changed

4 files changed

+17
-97
lines changed

mlir/include/mlir/Conversion/Passes.td

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -551,10 +551,7 @@ def LowerHostCodeToLLVMPass : Pass<"lower-host-to-llvm", "ModuleOp"> {
551551
def ConvertGpuOpsToLLVMSPVOps : Pass<"convert-gpu-to-llvm-spv", "gpu::GPUModuleOp"> {
552552
let summary =
553553
"Generate LLVM operations to be ingested by a SPIR-V backend for gpu operations";
554-
let dependentDialects = [
555-
"LLVM::LLVMDialect",
556-
"spirv::SPIRVDialect",
557-
];
554+
let dependentDialects = ["LLVM::LLVMDialect"];
558555
let options = [
559556
Option<"indexBitwidth", "index-bitwidth", "unsigned",
560557
/*default=kDeriveIndexBitwidthFromDataLayout*/"0",

mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,4 @@ add_mlir_conversion_library(MLIRGPUToLLVMSPV
1010
MLIRLLVMCommonConversion
1111
MLIRLLVMDialect
1212
MLIRSPIRVAttrToLLVMConversion
13-
MLIRSPIRVDialect
1413
)

mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
2121
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
2222
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
23-
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
2423
#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
25-
#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
2624
#include "mlir/IR/BuiltinTypes.h"
2725
#include "mlir/IR/Matchers.h"
2826
#include "mlir/IR/PatternMatch.h"
@@ -274,10 +272,11 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern<gpu::ShuffleOp> {
274272
}
275273

276274
/// Get the subgroup size from the target or return a default.
277-
static int getSubgroupSize(Operation *op) {
278-
return spirv::lookupTargetEnvOrDefault(op)
279-
.getResourceLimits()
280-
.getSubgroupSize();
275+
static std::optional<int> getSubgroupSize(Operation *op) {
276+
auto parentFunc = op->getParentOfType<LLVM::LLVMFuncOp>();
277+
if (!parentFunc)
278+
return std::nullopt;
279+
return parentFunc.getIntelReqdSubGroupSize();
281280
}
282281

283282
static bool hasValidWidth(gpu::ShuffleOp op) {

mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir

Lines changed: 11 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -227,84 +227,9 @@ gpu.module @barriers {
227227

228228
// -----
229229

230-
// Check `gpu.shuffle` conversion with default subgroup size.
231-
232-
gpu.module @shuffles {
233-
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
234-
// CHECK-SAME-DAG: no_unwind
235-
// CHECK-SAME-DAG: convergent
236-
// CHECK-SAME-DAG: will_return
237-
// CHECK-NOT: memory_effects = #llvm.memory_effects
238-
// CHECK-SAME: }
239-
// CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
240-
// CHECK-SAME-DAG: no_unwind
241-
// CHECK-SAME-DAG: convergent
242-
// CHECK-SAME-DAG: will_return
243-
// CHECK-NOT: memory_effects = #llvm.memory_effects
244-
// CHECK-SAME: }
245-
// CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
246-
// CHECK-SAME-DAG: no_unwind
247-
// CHECK-SAME-DAG: convergent
248-
// CHECK-SAME-DAG: will_return
249-
// CHECK-NOT: memory_effects = #llvm.memory_effects
250-
// CHECK-SAME: }
251-
// CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
252-
// CHECK-SAME-DAG: no_unwind
253-
// CHECK-SAME-DAG: convergent
254-
// CHECK-SAME-DAG: will_return
255-
// CHECK-NOT: memory_effects = #llvm.memory_effects
256-
// CHECK-SAME: }
257-
258-
// CHECK-LABEL: gpu_shuffles
259-
// CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
260-
func.func @gpu_shuffles(%val0: i32, %id: i32,
261-
%val1: i64, %mask: i32,
262-
%val2: f32, %delta_up: i32,
263-
%val3: f64, %delta_down: i32) {
264-
%width = arith.constant 32 : i32
265-
// CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
266-
// CHECK-SAME-DAG: no_unwind
267-
// CHECK-SAME-DAG: convergent
268-
// CHECK-SAME-DAG: will_return
269-
// CHECK-NOT: memory_effects = #llvm.memory_effects
270-
// CHECK-SAME: } : (i32, i32) -> i32
271-
// CHECK: llvm.mlir.constant(true) : i1
272-
// CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[VAL_2]], %[[VAL_3]]) {
273-
// CHECK-SAME-DAG: no_unwind
274-
// CHECK-SAME-DAG: convergent
275-
// CHECK-SAME-DAG: will_return
276-
// CHECK-NOT: memory_effects = #llvm.memory_effects
277-
// CHECK-SAME: } : (i64, i32) -> i64
278-
// CHECK: llvm.mlir.constant(true) : i1
279-
// CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[VAL_4]], %[[VAL_5]]) {
280-
// CHECK-SAME-DAG: no_unwind
281-
// CHECK-SAME-DAG: convergent
282-
// CHECK-SAME-DAG: will_return
283-
// CHECK-NOT: memory_effects= #llvm.memory_effects
284-
// CHECK-SAME: } : (f32, i32) -> f32
285-
// CHECK: llvm.mlir.constant(true) : i1
286-
// CHECK: llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[VAL_6]], %[[VAL_7]]) {
287-
// CHECK-SAME-DAG: no_unwind
288-
// CHECK-SAME-DAG: convergent
289-
// CHECK-SAME-DAG: will_return
290-
// CHECK-NOT: memory_effects= #llvm.memory_effects
291-
// CHECK-SAME: } : (f64, i32) -> f64
292-
// CHECK: llvm.mlir.constant(true) : i1
293-
%shuffleResult0, %valid0 = gpu.shuffle idx %val0, %id, %width : i32
294-
%shuffleResult1, %valid1 = gpu.shuffle xor %val1, %mask, %width : i64
295-
%shuffleResult2, %valid2 = gpu.shuffle up %val2, %delta_up, %width : f32
296-
%shuffleResult3, %valid3 = gpu.shuffle down %val3, %delta_down, %width : f64
297-
return
298-
}
299-
}
300-
301-
// -----
302-
303230
// Check `gpu.shuffle` conversion with explicit subgroup size.
304231

305-
gpu.module @shuffles attributes {
306-
spirv.target_env = #spirv.target_env<#spirv.vce<v1.4, [Kernel, Addresses, GroupNonUniformShuffle, Int64], []>, #spirv.resource_limits<subgroup_size = 16>>
307-
} {
232+
gpu.module @shuffles {
308233
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
309234
// CHECK-SAME-DAG: no_unwind
310235
// CHECK-SAME-DAG: convergent
@@ -352,15 +277,15 @@ gpu.module @shuffles attributes {
352277
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
353278
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
354279
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
355-
// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
356-
func.func @gpu_shuffles(%i8_val: i8,
280+
// CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
281+
llvm.func @gpu_shuffles(%i8_val: i8,
357282
%i16_val: i16,
358283
%i32_val: i32,
359284
%i64_val: i64,
360285
%f16_val: f16,
361286
%f32_val: f32,
362287
%f64_val: f64,
363-
%offset: i32) {
288+
%offset: i32) attributes {intel_reqd_sub_group_size = 16 : i32} {
364289
%width = arith.constant 16 : i32
365290
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
366291
// CHECK: llvm.mlir.constant(true) : i1
@@ -383,7 +308,7 @@ gpu.module @shuffles attributes {
383308
%shuffleResult4, %valid4 = gpu.shuffle up %f16_val, %offset, %width : f16
384309
%shuffleResult5, %valid5 = gpu.shuffle up %f32_val, %offset, %width : f32
385310
%shuffleResult6, %valid6 = gpu.shuffle down %f64_val, %offset, %width : f64
386-
return
311+
llvm.return
387312
}
388313
}
389314

@@ -392,11 +317,11 @@ gpu.module @shuffles attributes {
392317
// Cannot convert due to shuffle width and target subgroup size mismatch
393318

394319
gpu.module @shuffles_mismatch {
395-
func.func @gpu_shuffles(%val: i32, %id: i32) {
320+
llvm.func @gpu_shuffles(%val: i32, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
396321
%width = arith.constant 16 : i32
397322
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
398323
%shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32
399-
return
324+
llvm.return
400325
}
401326
}
402327

@@ -405,10 +330,10 @@ gpu.module @shuffles_mismatch {
405330
// Cannot convert due to variable shuffle width
406331

407332
gpu.module @shuffles_mismatch {
408-
func.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) {
333+
llvm.func @gpu_shuffles(%val: i32, %id: i32, %width: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
409334
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
410335
%shuffleResult, %valid = gpu.shuffle idx %val, %id, %width : i32
411-
return
336+
llvm.return
412337
}
413338
}
414339

@@ -417,11 +342,11 @@ gpu.module @shuffles_mismatch {
417342
// Cannot convert due to value type not being supported by the conversion
418343

419344
gpu.module @not_supported_lowering {
420-
func.func @gpu_shuffles(%val: i1, %id: i32) {
345+
llvm.func @gpu_shuffles(%val: i1, %id: i32) attributes {intel_reqd_sub_group_size = 32 : i32} {
421346
%width = arith.constant 32 : i32
422347
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
423348
%shuffleResult, %valid = gpu.shuffle xor %val, %id, %width : i1
424-
return
349+
llvm.return
425350
}
426351
}
427352

0 commit comments

Comments
 (0)