delete the feature of remove invalid loops.

linuxlonelyeagle · linuxlonelyeagle · commit c834f4d70494 · 2025-02-22T18:07:59.000+08:00
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
@@ -43,8 +43,8 @@ void getTripCountMapAndOperands(AffineForOp forOp, AffineMap *map,
 /// constant trip count in non-trivial cases.
 std::optional<uint64_t> getConstantTripCount(AffineForOp forOp);
 
-/// In the GPU, the number of trip of each thread in the loop is inconsistent.
-/// This function returns the maximum number of trip.
+/// In some scenarios, such as GPU, the number of trip of each thread in the
+/// loop is inconsistent. This function returns the maximum number of trip.
 std::optional<uint64_t> getMaxConstantTripCount(AffineForOp forOp);
 
 /// Returns the greatest known integral divisor of the trip count. Affine
diff --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@@ -86,9 +86,6 @@ LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
 /// was known to have a single iteration.
 LogicalResult promoteIfSingleIteration(AffineForOp forOp);
 
-/// Eliminate loops that will never actually execute.
-LogicalResult removeInvalidLoop(AffineForOp forOp);
-
 /// Promotes all single iteration AffineForOp's in the Function, i.e., moves
 /// their body into the containing Block.
 void promoteSingleIterationLoops(func::FuncOp f);
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td
@@ -1039,7 +1039,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
     /// Find BlockSize via the BlockArgument of gpu.launch.
     Value getBlockSizeOnAxis(Value threadId);
 
-    ///  Find BlockSize via the Dimension Information.
+    /// Find BlockSize via the Dimension Information.
     Value getBlockSizeOnAxis(Dimension dimension);
   }];
 
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -114,23 +114,6 @@ static void replaceIterArgsAndYieldResults(AffineForOp forOp) {
     std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
 }
 
-/// Eliminate loops that will never actually execute
-LogicalResult mlir::affine::removeInvalidLoop(AffineForOp forOp) {
-  std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
-  std::optional<uint64_t> maxTripCount = getMaxConstantTripCount(forOp);
-  if (!tripCount || *tripCount > 0 || !maxTripCount || *maxTripCount > 0)
-    return failure();
-
-  auto iterOperands = forOp.getInits();
-  auto results = forOp.getResults();
-  for (auto [result, operand] : llvm::zip(results, iterOperands))
-    result.replaceAllUsesWith(operand);
-
-  IRRewriter b(forOp);
-  b.eraseOp(forOp);
-  return success();
-}
-
 /// Promotes the loop body of a forOp to its containing block if the forOp
 /// was known to have a single iteration.
 LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) {
@@ -914,12 +897,8 @@ LogicalResult mlir::affine::loopUnrollFull(AffineForOp forOp) {
   uint64_t tripCount = *mayBeConstantTripCount;
   uint64_t maxTripCount = *maxMayBeConstantTripCount;
 
-  // The values of Trip are all 0, and the invalid loop is deleted.
-  if (tripCount <= 0 && maxTripCount <= 0)
-    return removeInvalidLoop(forOp);
-
-  // In special cases, such as in a GPU, only some threads execute this loop.
-  if (tripCount == 0 && maxTripCount == 1)
+  // Trip equals 0, this loop cannot unroll.
+  if (tripCount <= 0)
     return success();
 
   if (tripCount == 1 && maxTripCount == 1)
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
@@ -270,58 +270,40 @@ func.func @thread_partial_execution() {
       %3 = arith.addi %arg, %0 : index
       affine.yield %3 : index
     }
-    // UNROLL-FULL: %{{.*}} = affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
-    // UNROLL-FULL:   %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
-    // UNROLL-FULL:   affine.yield %[[SUM]] : index
-    // UNROLL-FULL: }
+    // UNROLL-FULL: affine.for %{{.*}} = %{{.*}} to 3 step 2 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
+    // UNROLL-FULL-NEXT:   %[[SUM:.*]] = arith.addi %[[ARG]], %[[C0]] : index
+    // UNROLL-FULL-NEXT:   affine.yield %[[SUM]] : index
+    // UNROLL-FULL-NEXT: }
     gpu.terminator
   }
   return
 }
 
-// UNROLL-FULL-LABEL: func @invalid_loop
-func.func @invalid_loop() {
-  %0 = arith.constant 0 :index
-  %1 = arith.constant 2 : index
-  gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
-             threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
-    %threadid = gpu.thread_id x
-    affine.for %iv = %tx to 0 step 2 iter_args(%arg = %0) -> index {
-      %3 = arith.addi %arg, %0 : index
-      affine.yield %3 : index
-    }
-    gpu.terminator
-    // UNROLL-FULL-CHECK: %{{.*}} = gpu.thread_id  x
-    // UNROLL-FULL-CHECK: gpu.terminator
-  }
-  return
-}
-
 // UNROLL-FULL-LABEL: func @unroll_all_thread
 func.func @unroll_all_thread() {
   %0 = arith.constant 0 :index
   %1 = arith.constant 2 : index
-  // UNROLL-FULL-CHECK: %[[C0:.*]] = arith.constant 0 : index
+  // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
   gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
              threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
     %threadid = gpu.thread_id x
     %4 = affine.for %iv = %threadid to 6 step 2 iter_args(%arg = %0) -> index {
       %3 = arith.addi %arg, %0 : index
       affine.yield %3 : index
     }
-    // UNROLL-FULL-CHECK: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
-    // UNROLL-FULL-CHECK: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
-    // UNROLL-FULL-CHECK: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
+    // UNROLL-FULL: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
+    // UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
+    // UNROLL-FULL-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
     gpu.terminator
   }
   return
 }
 
-// UNROLL-FULL-LABEL:   func.func @partial_unroll_factor_4
+// UNROLL-FULL-LABEL: func.func @partial_unroll_factor_4
 func.func @partial_unroll_factor_4() {
   %0 = arith.constant 0 :index
   %1 = arith.constant 2 : index
-  // UNROLL-FULL:           %[[C0:.*]] = arith.constant 0 : index
+  // UNROLL-FULL: %[[C0:.*]] = arith.constant 0 : index
   gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %1, %sz_by = %1, %sz_bz = %1)
              threads(%tx, %ty, %tz) in (%sz_tx = %1, %sz_ty = %1, %sz_tz = %1) {
     %threadid = gpu.thread_id x
@@ -332,13 +314,13 @@ func.func @partial_unroll_factor_4() {
     gpu.terminator
   }
   // UNROLL-FULL: %[[ID:.*]] = gpu.thread_id  x
-  // UNROLL-FULL: affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
-  // UNROLL-FULL:   %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
-  // UNROLL-FULL:   %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
-  // UNROLL-FULL:   %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
-  // UNROLL-FULL:   %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
-  // UNROLL-FULL:   affine.yield %[[SUM_3]] : index
-  // UNROLL-FULL: }
+  // UNROLL-FULL-NEXT: affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
+  // UNROLL-FULL-NEXT:   %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
+  // UNROLL-FULL-NEXT:   %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
+  // UNROLL-FULL-NEXT:   %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
+  // UNROLL-FULL-NEXT:   %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
+  // UNROLL-FULL-NEXT:   affine.yield %[[SUM_3]] : index
+  // UNROLL-FULL-NEXT: }
   return
 }
 
@@ -800,14 +782,14 @@ func.func @gpu_launch_unroll_by_factor_4() {
     gpu.terminator
   }
   // UNROLL-BY-4: %[[ID:.*]] = gpu.thread_id  x
-  // UNROLL-BY-4: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
-  // UNROLL-BY-4: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
-  // UNROLL-BY-4: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
-  // UNROLL-BY-4: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
-  // UNROLL-BY-4: affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
-  // UNROLL-BY-4:   %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
-  // UNROLL-BY-4:   affine.yield %[[SUM_4]] : index
-  // UNROLL-BY-4: }
+  // UNROLL-BY-4-NEXT: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
+  // UNROLL-BY-4-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
+  // UNROLL-BY-4-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
+  // UNROLL-BY-4-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
+  // UNROLL-BY-4-NEXT: affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
+  // UNROLL-BY-4-NEXT:   %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
+  // UNROLL-BY-4-NEXT:   affine.yield %[[SUM_4]] : index
+  // UNROLL-BY-4-NEXT: }
   return
 }