Skip to content

Commit e58e115

Browse files
fix test.
1 parent e31ff46 commit e58e115

File tree

3 files changed

+21
-16
lines changed

3 files changed

+21
-16
lines changed

mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,9 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
293293
// divisors.
294294
assert(map.getNumResults() >= 1 && "expected one or more results");
295295
std::optional<uint64_t> gcd;
296-
for (auto resultExpr : map.getResults()) {
296+
for (unsigned i = 0, e = map.getResults().size(); i < e; ++i) {
297297
uint64_t thisGcd;
298-
AffineMap subMap =
299-
AffineMap::get(map.getNumDims(), map.getNumSymbols(), resultExpr);
298+
AffineMap subMap = map.getSubMap(i);
300299
ValueBoundsConstraintSet::Variable var(subMap, operands);
301300
auto lbBound = ValueBoundsConstraintSet::computeConstantBound(
302301
mlir::presburger::BoundType::LB, var);
@@ -310,7 +309,8 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
310309
thisGcd = tripCount;
311310
} else {
312311
// Trip count is not a known constant; return its largest known divisor.
313-
thisGcd = resultExpr.getLargestKnownDivisor();
312+
thisGcd = map.getResult(i).getLargestKnownDivisor();
313+
;
314314
}
315315
if (gcd.has_value())
316316
gcd = std::gcd(*gcd, thisGcd);

mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1048,7 +1048,10 @@ LogicalResult mlir::affine::loopUnrollByFactor(
10481048
}
10491049

10501050
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
1051-
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
1051+
// If the trip count has a range, a clean up loop needs to be generated.
1052+
if ((mayBeConstantTripCount && maxMayBeConstantTripCount &&
1053+
*mayBeConstantTripCount != *maxMayBeConstantTripCount) ||
1054+
getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
10521055
// Loops where the lower bound is a max expression or the upper bound is
10531056
// a min expression and the trip count doesn't divide the unroll factor
10541057
// can't be unrolled since the lower bound of the cleanup loop in such cases

mlir/test/Dialect/Affine/unroll.mlir

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// UNROLL-FULL-DAG: [[$MAP4:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 1)>
1313
// UNROLL-FULL-DAG: [[$MAP5:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 3)>
1414
// UNROLL-FULL-DAG: [[$MAP6:#map[0-9]*]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
15+
// UNROLL-FULL-DAG: [[$MAP7:#map[0-9]*]] = affine_map<()[s0] -> (s0 + (((-s0 + 9) ceildiv 2) floordiv 4) * 8)>
1516

1617
// SHORT-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>
1718

@@ -22,8 +23,8 @@
2223
// UNROLL-BY-4-DAG: [[$MAP4:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 + 3)>
2324
// UNROLL-BY-4-DAG: [[$MAP5:#map[0-9]*]] = affine_map<(d0)[s0] -> (d0 + s0 + 1)>
2425
// UNROLL-BY-4-DAG: [[$MAP6:#map[0-9]*]] = affine_map<(d0, d1) -> (d0 * 16 + d1)>
25-
// UNROLL-BY-4-DAG: [[$MAP11:#map[0-9]*]] = affine_map<(d0) -> (d0)>
26-
// UNROLL-BY-4-DAG: [[$MAP7:#map[0-9]*]] = affine_map<()[s0] -> (s0 + (((-s0 + 11) ceildiv 2) floordiv 4) * 8)>
26+
// UNROLL-BY-4-DAG: [[$MAP7:#map[0-9]*]] = affine_map<(d0) -> (d0)>
27+
// UNROLL-BY-4-DAG: [[$MAP8:#map[0-9]*]] = affine_map<()[s0] -> (s0 + (((-s0 + 11) ceildiv 2) floordiv 4) * 8)>
2728

2829
// UNROLL-FULL-LABEL: func @loop_nest_simplest() {
2930
func.func @loop_nest_simplest() {
@@ -314,12 +315,13 @@ func.func @partial_unroll_factor_4() {
314315
gpu.terminator
315316
}
316317
// UNROLL-FULL: %[[ID:.*]] = gpu.thread_id x
317-
// UNROLL-FULL-NEXT: affine.for %{{.*}} = %[[ID]] to 9 step 8 iter_args(%[[ARG:.*]] = %[[C0]]) -> (index) {
318-
// UNROLL-FULL-NEXT: %[[SUM_0:.*]] = arith.addi %[[ARG]], %[[C0]] : index
319-
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
320-
// UNROLL-FULL-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
321-
// UNROLL-FULL-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
322-
// UNROLL-FULL-NEXT: affine.yield %[[SUM_3]] : index
318+
// UNROLL-FULL-NEXT: %[[SUM_0:.*]] = arith.addi %[[C0]], %[[C0]] : index
319+
// UNROLL-FULL-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
320+
// UNROLL-FULL-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
321+
// UNROLL-FULL-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
322+
// UNROLL-FULL-NEXT: affine.for %{{.*}} = [[$MAP7]]()[%[[ID]]] to 9 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
323+
// UNROLL-FULL-NEXT: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
324+
// UNROLL-FULL-NEXT: affine.yield %[[SUM_4]] : index
323325
// UNROLL-FULL-NEXT: }
324326
return
325327
}
@@ -536,7 +538,7 @@ func.func @loop_nest_operand1() {
536538
// UNROLL-BY-4-LABEL: func @loop_nest_operand2() {
537539
func.func @loop_nest_operand2() {
538540
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 {
539-
// UNROLL-BY-4-NEXT: affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
541+
// UNROLL-BY-4-NEXT: affine.for %arg1 = [[$MAP7]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
540542
// UNROLL-BY-4-NEXT: %0 = "foo"() : () -> i32
541543
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
542544
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
@@ -582,7 +584,7 @@ func.func @floordiv_mod_ub(%M : index, %N : index) {
582584
func.func @loop_nest_operand3() {
583585
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 step 2 {
584586
affine.for %i = 0 to 100 step 2 {
585-
// UNROLL-BY-4: affine.for %arg1 = [[$MAP11]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
587+
// UNROLL-BY-4: affine.for %arg1 = [[$MAP7]](%arg0) to #map{{[0-9]*}}(%arg0) step 4 {
586588
// UNROLL-BY-4-NEXT: %1 = "foo"() : () -> i32
587589
// UNROLL-BY-4-NEXT: %2 = "foo"() : () -> i32
588590
// UNROLL-BY-4-NEXT: %3 = "foo"() : () -> i32
@@ -786,7 +788,7 @@ func.func @gpu_launch_unroll_by_factor_4() {
786788
// UNROLL-BY-4-NEXT: %[[SUM_1:.*]] = arith.addi %[[SUM_0]], %[[C0]] : index
787789
// UNROLL-BY-4-NEXT: %[[SUM_2:.*]] = arith.addi %[[SUM_1]], %[[C0]] : index
788790
// UNROLL-BY-4-NEXT: %[[SUM_3:.*]] = arith.addi %[[SUM_2]], %[[C0]] : index
789-
// UNROLL-BY-4-NEXT: affine.for %[[VAL_20:.*]] = [[$MAP7]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
791+
// UNROLL-BY-4-NEXT: affine.for %[[VAL_20:.*]] = [[$MAP8]](){{\[}}%[[ID]]] to 11 step 2 iter_args(%[[ARG:.*]] = %[[SUM_3]]) -> (index) {
790792
// UNROLL-BY-4-NEXT: %[[SUM_4:.*]] = arith.addi %[[ARG]], %[[C0]] : index
791793
// UNROLL-BY-4-NEXT: affine.yield %[[SUM_4]] : index
792794
// UNROLL-BY-4-NEXT: }

0 commit comments

Comments
 (0)