@@ -270,58 +270,40 @@ func.func @thread_partial_execution() {
270
270
%3 = arith.addi %arg , %0 : index
271
271
affine.yield %3 : index
272
272
}
273
- // UNROLL-FULL: %{{.*}} = affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274
- // UNROLL-FULL: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
275
- // UNROLL-FULL: affine.yield %[[SUM]] : index
276
- // UNROLL-FULL: }
273
+ // UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274
+ // UNROLL-FULL-NEXT : %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
275
+ // UNROLL-FULL-NEXT : affine.yield %[[SUM]] : index
276
+ // UNROLL-FULL-NEXT : }
277
277
gpu.terminator
278
278
}
279
279
return
280
280
}
281
281
282
- // UNROLL-FULL-LABEL: func @invalid_loop
283
- func.func @invalid_loop () {
284
- %0 = arith.constant 0 :index
285
- %1 = arith.constant 2 : index
286
- gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
287
- threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
288
- %threadid = gpu.thread_id x
289
- affine.for %iv = %tx to 0 step 2 iter_args (%arg = %0 ) -> index {
290
- %3 = arith.addi %arg , %0 : index
291
- affine.yield %3 : index
292
- }
293
- gpu.terminator
294
- // UNROLL-FULL-CHECK: %{{.*}} = gpu.thread_id x
295
- // UNROLL-FULL-CHECK: gpu.terminator
296
- }
297
- return
298
- }
299
-
300
282
// UNROLL-FULL-LABEL: func @unroll_all_thread
301
283
func.func @unroll_all_thread () {
302
284
%0 = arith.constant 0 :index
303
285
%1 = arith.constant 2 : index
304
- // UNROLL-FULL-CHECK : %[[C0:.*]] = arith.constant 0 : index
286
+ // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
305
287
gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
306
288
threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
307
289
%threadid = gpu.thread_id x
308
290
%4 = affine.for %iv = %threadid to 6 step 2 iter_args (%arg = %0 ) -> index {
309
291
%3 = arith.addi %arg , %0 : index
310
292
affine.yield %3 : index
311
293
}
312
- // UNROLL-FULL-CHECK : %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
313
- // UNROLL-FULL-CHECK : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
314
- // UNROLL-FULL-CHECK : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
294
+ // UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
295
+ // UNROLL-FULL-NEXT : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
296
+ // UNROLL-FULL-NEXT : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
315
297
gpu.terminator
316
298
}
317
299
return
318
300
}
319
301
320
- // UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
302
+ // UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
321
303
func.func @partial_unroll_factor_4 () {
322
304
%0 = arith.constant 0 :index
323
305
%1 = arith.constant 2 : index
324
- // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
306
+ // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
325
307
gpu.launch blocks (%bx , %by , %bz ) in (%sz_bx = %1 , %sz_by = %1 , %sz_bz = %1 )
326
308
threads (%tx , %ty , %tz ) in (%sz_tx = %1 , %sz_ty = %1 , %sz_tz = %1 ) {
327
309
%threadid = gpu.thread_id x
@@ -332,13 +314,13 @@ func.func @partial_unroll_factor_4() {
332
314
gpu.terminator
333
315
}
334
316
// UNROLL-FULL: %[[ID:.*]] = gpu.thread_id x
335
- // UNROLL-FULL: affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
336
- // UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
337
- // UNROLL-FULL: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
338
- // UNROLL-FULL: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
339
- // UNROLL-FULL: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
340
- // UNROLL-FULL: affine.yield %[[SUM_3]] : index
341
- // UNROLL-FULL: }
317
+ // UNROLL-FULL-NEXT : affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
318
+ // UNROLL-FULL-NEXT : %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
319
+ // UNROLL-FULL-NEXT : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
320
+ // UNROLL-FULL-NEXT : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
321
+ // UNROLL-FULL-NEXT : %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
322
+ // UNROLL-FULL-NEXT : affine.yield %[[SUM_3]] : index
323
+ // UNROLL-FULL-NEXT : }
342
324
return
343
325
}
344
326
@@ -800,14 +782,14 @@ func.func @gpu_launch_unroll_by_factor_4() {
800
782
gpu.terminator
801
783
}
802
784
// UNROLL-BY-4: %[[ID:.*]] = gpu.thread_id x
803
- // UNROLL-BY-4: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
804
- // UNROLL-BY-4: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
805
- // UNROLL-BY-4: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
806
- // UNROLL-BY-4: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
807
- // UNROLL-BY-4: affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
808
- // UNROLL-BY-4: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
809
- // UNROLL-BY-4: affine.yield %[[SUM_4]] : index
810
- // UNROLL-BY-4: }
785
+ // UNROLL-BY-4-NEXT : %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
786
+ // UNROLL-BY-4-NEXT : %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
787
+ // UNROLL-BY-4-NEXT : %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
788
+ // UNROLL-BY-4-NEXT : %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
789
+ // UNROLL-BY-4-NEXT : affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
790
+ // UNROLL-BY-4-NEXT : %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
791
+ // UNROLL-BY-4-NEXT : affine.yield %[[SUM_4]] : index
792
+ // UNROLL-BY-4-NEXT : }
811
793
return
812
794
}
813
795
0 commit comments