[mlir] account for explicit affine.parallel in parallelization (#130812)

ftynse · web-flow · commit 6981f7e92a05 · 2025-03-11T20:53:50.000-05:00
Affine parallelization should take explicitly parallel loops into
account when computing loop depth for dependency analysis purposes. This
was previously not the case, potentially leading to loops incorrectly
being marked as parallel due to depth mismatch.
diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
@@ -1988,6 +1988,8 @@ unsigned mlir::affine::getNestingDepth(Operation *op) {
   while ((currOp = currOp->getParentOp())) {
     if (isa<AffineForOp>(currOp))
       depth++;
+    if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
+      depth += parOp.getNumDims();
   }
   return depth;
 }
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -341,3 +341,23 @@ func.func @test_add_inv_or_terminal_symbol(%arg0: memref<9x9xi32>, %arg1: i1) {
   }
   return
 }
+
+// Ensure that outer parallel loops are taken into account when computing the
+// loop depth in dependency analysis during parallelization. With correct
+// depth, the analysis should see the inner loop as sequential due to reads and
+// writes to the same address indexed by the outer (parallel) loop.
+//
+// CHECK-LABEL: @explicit_parallel
+func.func @explicit_parallel(%arg0: memref<1x123x194xf64>, %arg5: memref<34x99x194xf64>) {
+  // CHECK: affine.parallel
+  affine.parallel (%arg7, %arg8) = (0, 0) to (85, 180) {
+    // CHECK: affine.for
+    affine.for %arg9 = 0 to 18 {
+      %0 = affine.load %arg0[0, %arg7 + 19, %arg8 + 7] : memref<1x123x194xf64>
+      %1 = affine.load %arg5[%arg9 + 8, %arg7 + 7, %arg8 + 7] : memref<34x99x194xf64>
+      %2 = arith.addf %0, %1 {fastmathFlags = #llvm.fastmath<none>} : f64
+      affine.store %1, %arg0[0, %arg7 + 19, %arg8 + 7] : memref<1x123x194xf64>
+    }
+  }
+  return
+}

Original file line number	Diff line number	Diff line change
`@@ -1988,6 +1988,8 @@ unsigned mlir::affine::getNestingDepth(Operation *op) {`
`1988`	`1988`	`while ((currOp = currOp->getParentOp())) {`
`1989`	`1989`	`if (isa<AffineForOp>(currOp))`
`1990`	`1990`	`depth++;`
	`1991`	`+ if (auto parOp = dyn_cast<AffineParallelOp>(currOp))`
	`1992`	`+ depth += parOp.getNumDims();`
`1991`	`1993`	`}`
`1992`	`1994`	`return depth;`
`1993`	`1995`	`}`