@@ -227,84 +227,9 @@ gpu.module @barriers {
227
227
228
228
// -----
229
229
230
- // Check `gpu.shuffle` conversion with default subgroup size.
231
-
232
- gpu.module @shuffles {
233
- // CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
234
- // CHECK-SAME-DAG: no_unwind
235
- // CHECK-SAME-DAG: convergent
236
- // CHECK-SAME-DAG: will_return
237
- // CHECK-NOT: memory_effects = #llvm.memory_effects
238
- // CHECK-SAME: }
239
- // CHECK: llvm.func spir_funccc @_Z20sub_group_shuffle_upfj(f32, i32) -> f32 attributes {
240
- // CHECK-SAME-DAG: no_unwind
241
- // CHECK-SAME-DAG: convergent
242
- // CHECK-SAME-DAG: will_return
243
- // CHECK-NOT: memory_effects = #llvm.memory_effects
244
- // CHECK-SAME: }
245
- // CHECK: llvm.func spir_funccc @_Z21sub_group_shuffle_xorlj(i64, i32) -> i64 attributes {
246
- // CHECK-SAME-DAG: no_unwind
247
- // CHECK-SAME-DAG: convergent
248
- // CHECK-SAME-DAG: will_return
249
- // CHECK-NOT: memory_effects = #llvm.memory_effects
250
- // CHECK-SAME: }
251
- // CHECK: llvm.func spir_funccc @_Z17sub_group_shuffleij(i32, i32) -> i32 attributes {
252
- // CHECK-SAME-DAG: no_unwind
253
- // CHECK-SAME-DAG: convergent
254
- // CHECK-SAME-DAG: will_return
255
- // CHECK-NOT: memory_effects = #llvm.memory_effects
256
- // CHECK-SAME: }
257
-
258
- // CHECK-LABEL: gpu_shuffles
259
- // CHECK-SAME: (%[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: f32, %[[VAL_5:.*]]: i32, %[[VAL_6:.*]]: f64, %[[VAL_7:.*]]: i32)
260
- func.func @gpu_shuffles (%val0: i32 , %id: i32 ,
261
- %val1: i64 , %mask: i32 ,
262
- %val2: f32 , %delta_up: i32 ,
263
- %val3: f64 , %delta_down: i32 ) {
264
- %width = arith.constant 32 : i32
265
- // CHECK: llvm.call spir_funccc @_Z17sub_group_shuffleij(%[[VAL_0]], %[[VAL_1]]) {
266
- // CHECK-SAME-DAG: no_unwind
267
- // CHECK-SAME-DAG: convergent
268
- // CHECK-SAME-DAG: will_return
269
- // CHECK-NOT: memory_effects = #llvm.memory_effects
270
- // CHECK-SAME: } : (i32, i32) -> i32
271
- // CHECK: llvm.mlir.constant(true) : i1
272
- // CHECK: llvm.call spir_funccc @_Z21sub_group_shuffle_xorlj(%[[VAL_2]], %[[VAL_3]]) {
273
- // CHECK-SAME-DAG: no_unwind
274
- // CHECK-SAME-DAG: convergent
275
- // CHECK-SAME-DAG: will_return
276
- // CHECK-NOT: memory_effects = #llvm.memory_effects
277
- // CHECK-SAME: } : (i64, i32) -> i64
278
- // CHECK: llvm.mlir.constant(true) : i1
279
- // CHECK: llvm.call spir_funccc @_Z20sub_group_shuffle_upfj(%[[VAL_4]], %[[VAL_5]]) {
280
- // CHECK-SAME-DAG: no_unwind
281
- // CHECK-SAME-DAG: convergent
282
- // CHECK-SAME-DAG: will_return
283
- // CHECK-NOT: memory_effects= #llvm.memory_effects
284
- // CHECK-SAME: } : (f32, i32) -> f32
285
- // CHECK: llvm.mlir.constant(true) : i1
286
- // CHECK: llvm.call spir_funccc @_Z22sub_group_shuffle_downdj(%[[VAL_6]], %[[VAL_7]]) {
287
- // CHECK-SAME-DAG: no_unwind
288
- // CHECK-SAME-DAG: convergent
289
- // CHECK-SAME-DAG: will_return
290
- // CHECK-NOT: memory_effects= #llvm.memory_effects
291
- // CHECK-SAME: } : (f64, i32) -> f64
292
- // CHECK: llvm.mlir.constant(true) : i1
293
- %shuffleResult0 , %valid0 = gpu.shuffle idx %val0 , %id , %width : i32
294
- %shuffleResult1 , %valid1 = gpu.shuffle xor %val1 , %mask , %width : i64
295
- %shuffleResult2 , %valid2 = gpu.shuffle up %val2 , %delta_up , %width : f32
296
- %shuffleResult3 , %valid3 = gpu.shuffle down %val3 , %delta_down , %width : f64
297
- return
298
- }
299
- }
300
-
301
- // -----
302
-
303
230
// Check `gpu.shuffle` conversion with explicit subgroup size.
304
231
305
- gpu.module @shuffles attributes {
306
- spirv.target_env = #spirv.target_env <#spirv.vce <v1.4 , [Kernel , Addresses , GroupNonUniformShuffle , Int64 ], []>, #spirv.resource_limits <subgroup_size = 16 >>
307
- } {
232
+ gpu.module @shuffles {
308
233
// CHECK: llvm.func spir_funccc @_Z22sub_group_shuffle_downdj(f64, i32) -> f64 attributes {
309
234
// CHECK-SAME-DAG: no_unwind
310
235
// CHECK-SAME-DAG: convergent
@@ -352,15 +277,15 @@ gpu.module @shuffles attributes {
352
277
// CHECK-SAME: (%[[I8_VAL:.*]]: i8, %[[I16_VAL:.*]]: i16,
353
278
// CHECK-SAME: %[[I32_VAL:.*]]: i32, %[[I64_VAL:.*]]: i64,
354
279
// CHECK-SAME: %[[F16_VAL:.*]]: f16, %[[F32_VAL:.*]]: f32,
355
- // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32) {
356
- func .func @gpu_shuffles (%i8_val: i8 ,
280
+ // CHECK-SAME: %[[F64_VAL:.*]]: f64, %[[OFFSET:.*]]: i32)
281
+ llvm .func @gpu_shuffles (%i8_val: i8 ,
357
282
%i16_val: i16 ,
358
283
%i32_val: i32 ,
359
284
%i64_val: i64 ,
360
285
%f16_val: f16 ,
361
286
%f32_val: f32 ,
362
287
%f64_val: f64 ,
363
- %offset: i32 ) {
288
+ %offset: i32 ) attributes { intel_reqd_sub_group_size = 16 : i32 } {
364
289
%width = arith.constant 16 : i32
365
290
// CHECK: llvm.call spir_funccc @_Z17sub_group_shufflecj(%[[I8_VAL]], %[[OFFSET]])
366
291
// CHECK: llvm.mlir.constant(true) : i1
@@ -383,7 +308,7 @@ gpu.module @shuffles attributes {
383
308
%shuffleResult4 , %valid4 = gpu.shuffle up %f16_val , %offset , %width : f16
384
309
%shuffleResult5 , %valid5 = gpu.shuffle up %f32_val , %offset , %width : f32
385
310
%shuffleResult6 , %valid6 = gpu.shuffle down %f64_val , %offset , %width : f64
386
- return
311
+ llvm. return
387
312
}
388
313
}
389
314
@@ -392,11 +317,11 @@ gpu.module @shuffles attributes {
392
317
// Cannot convert due to shuffle width and target subgroup size mismatch
393
318
394
319
gpu.module @shuffles_mismatch {
395
- func .func @gpu_shuffles (%val: i32 , %id: i32 ) {
320
+ llvm .func @gpu_shuffles (%val: i32 , %id: i32 ) attributes { intel_reqd_sub_group_size = 32 : i32 } {
396
321
%width = arith.constant 16 : i32
397
322
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
398
323
%shuffleResult , %valid = gpu.shuffle idx %val , %id , %width : i32
399
- return
324
+ llvm. return
400
325
}
401
326
}
402
327
@@ -405,10 +330,10 @@ gpu.module @shuffles_mismatch {
405
330
// Cannot convert due to variable shuffle width
406
331
407
332
gpu.module @shuffles_mismatch {
408
- func .func @gpu_shuffles (%val: i32 , %id: i32 , %width: i32 ) {
333
+ llvm .func @gpu_shuffles (%val: i32 , %id: i32 , %width: i32 ) attributes { intel_reqd_sub_group_size = 32 : i32 } {
409
334
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
410
335
%shuffleResult , %valid = gpu.shuffle idx %val , %id , %width : i32
411
- return
336
+ llvm. return
412
337
}
413
338
}
414
339
@@ -417,11 +342,11 @@ gpu.module @shuffles_mismatch {
417
342
// Cannot convert due to value type not being supported by the conversion
418
343
419
344
gpu.module @not_supported_lowering {
420
- func .func @gpu_shuffles (%val: i1 , %id: i32 ) {
345
+ llvm .func @gpu_shuffles (%val: i1 , %id: i32 ) attributes { intel_reqd_sub_group_size = 32 : i32 } {
421
346
%width = arith.constant 32 : i32
422
347
// expected-error@below {{failed to legalize operation 'gpu.shuffle' that was explicitly marked illegal}}
423
348
%shuffleResult , %valid = gpu.shuffle xor %val , %id , %width : i1
424
- return
349
+ llvm. return
425
350
}
426
351
}
427
352
0 commit comments