Skip to content

Commit c834f4d

Browse files
delete the feature of remove invalid loops.
1 parent 23b3a7f commit c834f4d

File tree

5 files changed

+30
-72
lines changed

5 files changed

+30
-72
lines changed

mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map,
4343
/// constant trip count in non-trivial cases.
4444
std::optional<uint64_t> getConstantTripCount(AffineForOp forOp);
4545

46-
/// In the GPU, the number of trip of each thread in the loop is inconsistent.
47-
/// This function returns the maximum number of trip.
46+
/// In some scenarios, such as GPU, the number of trip of each thread in the
47+
/// loop is inconsistent. This function returns the maximum number of trip.
4848
std::optional<uint64_t> getMaxConstantTripCount(AffineForOp forOp);
4949

5050
/// Returns the greatest known integral divisor of the trip count. Affine

mlir/include/mlir/Dialect/Affine/LoopUtils.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,6 @@ LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
8686
/// was known to have a single iteration.
8787
LogicalResult promoteIfSingleIteration(AffineForOp forOp);
8888

89-
/// Eliminate loops that will never actually execute.
90-
LogicalResult removeInvalidLoop(AffineForOp forOp);
91-
9289
/// Promotes all single iteration AffineForOp's in the Function, i.e., moves
9390
/// their body into the containing Block.
9491
void promoteSingleIterationLoops(func::FuncOp f);

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
10391039
/// Find BlockSize via the BlockArgument of gpu.launch.
10401040
Value getBlockSizeOnAxis(Value threadId);
10411041

1042-
/// Find BlockSize via the Dimension Information.
1042+
/// Find BlockSize via the Dimension Information.
10431043
Value getBlockSizeOnAxis(Dimension dimension);
10441044
}];
10451045

mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -114,23 +114,6 @@ static void replaceIterArgsAndYieldResults(AffineForOp forOp) {
114114
std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
115115
}
116116

117-
/// Eliminate loops that will never actually execute
118-
LogicalResult mlir::affine::removeInvalidLoop(AffineForOp forOp) {
119-
std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
120-
std::optional<uint64_t> maxTripCount = getMaxConstantTripCount(forOp);
121-
if (!tripCount || *tripCount > 0 || !maxTripCount || *maxTripCount > 0)
122-
return failure();
123-
124-
auto iterOperands = forOp.getInits();
125-
auto results = forOp.getResults();
126-
for (auto [result, operand] : llvm::zip(results, iterOperands))
127-
result.replaceAllUsesWith(operand);
128-
129-
IRRewriter b(forOp);
130-
b.eraseOp(forOp);
131-
return success();
132-
}
133-
134117
/// Promotes the loop body of a forOp to its containing block if the forOp
135118
/// was known to have a single iteration.
136119
LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
@@ -914,12 +897,8 @@ LogicalResult mlir::affine::loopUnrollFull(AffineForOp forOp) {
914897
uint64_t tripCount = *mayBeConstantTripCount;
915898
uint64_t maxTripCount = *maxMayBeConstantTripCount;
916899

917-
// The values of Trip are all 0, and the invalid loop is deleted.
918-
if (tripCount <= 0 && maxTripCount <= 0)
919-
return removeInvalidLoop(forOp);
920-
921-
// In special cases, such as in a GPU, only some threads execute this loop.
922-
if (tripCount == 0 && maxTripCount == 1)
900+
// Trip equals 0, this loop cannot unroll.
901+
if (tripCount <= 0)
923902
return success();
924903

925904
if (tripCount == 1 && maxTripCount == 1)

mlir/test/Dialect/Affine/unroll.mlir

Lines changed: 25 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -270,58 +270,40 @@ func.func @thread_partial_execution() {
270270
%3 = arith.addi %arg, %0 : index
271271
affine.yield %3 : index
272272
}
273-
// UNROLL-FULL: %{{.*}} = affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274-
// UNROLL-FULL: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
275-
// UNROLL-FULL: affine.yield %[[SUM]] : index
276-
// UNROLL-FULL: }
273+
// UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
274+
// UNROLL-FULL-NEXT: %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
275+
// UNROLL-FULL-NEXT: affine.yield %[[SUM]] : index
276+
// UNROLL-FULL-NEXT: }
277277
gpu.terminator
278278
}
279279
return
280280
}
281281

282-
// UNROLL-FULL-LABEL: func @invalid_loop
283-
func.func @invalid_loop() {
284-
%0 = arith.constant 0 :index
285-
%1 = arith.constant 2 : index
286-
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
287-
threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
288-
%threadid = gpu.thread_id x
289-
affine.for %iv = %tx to 0 step 2 iter_args(%arg = %0) -> index {
290-
%3 = arith.addi %arg, %0 : index
291-
affine.yield %3 : index
292-
}
293-
gpu.terminator
294-
// UNROLL-FULL-CHECK: %{{.*}} = gpu.thread_id x
295-
// UNROLL-FULL-CHECK: gpu.terminator
296-
}
297-
return
298-
}
299-
300282
// UNROLL-FULL-LABEL: func @unroll_all_thread
301283
func.func @unroll_all_thread() {
302284
%0 = arith.constant 0 :index
303285
%1 = arith.constant 2 : index
304-
// UNROLL-FULL-CHECK: %[[C0:.*]] = arith.constant 0 : index
286+
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
305287
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
306288
threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
307289
%threadid = gpu.thread_id x
308290
%4 = affine.for %iv = %threadid to 6 step 2 iter_args(%arg = %0) -> index {
309291
%3 = arith.addi %arg, %0 : index
310292
affine.yield %3 : index
311293
}
312-
// UNROLL-FULL-CHECK: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
313-
// UNROLL-FULL-CHECK: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
314-
// UNROLL-FULL-CHECK: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
294+
// UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
295+
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
296+
// UNROLL-FULL-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
315297
gpu.terminator
316298
}
317299
return
318300
}
319301

320-
// UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
302+
// UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
321303
func.func @partial_unroll_factor_4() {
322304
%0 = arith.constant 0 :index
323305
%1 = arith.constant 2 : index
324-
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
306+
// UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
325307
gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
326308
threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
327309
%threadid = gpu.thread_id x
@@ -332,13 +314,13 @@ func.func @partial_unroll_factor_4() {
332314
gpu.terminator
333315
}
334316
// UNROLL-FULL: %[[ID:.*]] = gpu.thread_id x
335-
// UNROLL-FULL: affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
336-
// UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
337-
// UNROLL-FULL: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
338-
// UNROLL-FULL: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
339-
// UNROLL-FULL: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
340-
// UNROLL-FULL: affine.yield %[[SUM_3]] : index
341-
// UNROLL-FULL: }
317+
// UNROLL-FULL-NEXT: affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
318+
// UNROLL-FULL-NEXT: %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
319+
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
320+
// UNROLL-FULL-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
321+
// UNROLL-FULL-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
322+
// UNROLL-FULL-NEXT: affine.yield %[[SUM_3]] : index
323+
// UNROLL-FULL-NEXT: }
342324
return
343325
}
344326

@@ -800,14 +782,14 @@ func.func @gpu_launch_unroll_by_factor_4() {
800782
gpu.terminator
801783
}
802784
// UNROLL-BY-4: %[[ID:.*]] = gpu.thread_id x
803-
// UNROLL-BY-4: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
804-
// UNROLL-BY-4: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
805-
// UNROLL-BY-4: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
806-
// UNROLL-BY-4: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
807-
// UNROLL-BY-4: affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
808-
// UNROLL-BY-4: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
809-
// UNROLL-BY-4: affine.yield %[[SUM_4]] : index
810-
// UNROLL-BY-4: }
785+
// UNROLL-BY-4-NEXT: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
786+
// UNROLL-BY-4-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
787+
// UNROLL-BY-4-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
788+
// UNROLL-BY-4-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
789+
// UNROLL-BY-4-NEXT: affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
790+
// UNROLL-BY-4-NEXT: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
791+
// UNROLL-BY-4-NEXT: affine.yield %[[SUM_4]] : index
792+
// UNROLL-BY-4-NEXT: }
811793
return
812794
}
813795

0 commit comments

Comments
 (0)