Skip to content

Commit 0756336

Browse files
Add method to normalize scf.forall op.
1 parent e4ecd3c commit 0756336

File tree

10 files changed

+199
-81
lines changed

10 files changed

+199
-81
lines changed

mlir/include/mlir/Dialect/SCF/Utils/Utils.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,13 @@ scf::ForallOp fuseIndependentSiblingForallLoops(scf::ForallOp target,
203203
scf::ForOp fuseIndependentSiblingForLoops(scf::ForOp target, scf::ForOp source,
204204
RewriterBase &rewriter);
205205

206+
/// Normalize an `scf.forall` operation. Returns `failure()`if normalization fails.
207+
// On `success()` returns the
208+
/// newly created operation with all uses of the original operation replaced
209+
/// with results of the new operation.
210+
FailureOr<scf::ForallOp> normalizeForallOp(RewriterBase &rewriter,
211+
scf::ForallOp forallOp);
212+
206213
} // namespace mlir
207214

208215
#endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_

mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp

Lines changed: 100 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "mlir/Dialect/Affine/IR/AffineOps.h"
1414
#include "mlir/Dialect/Arith/IR/Arith.h"
15+
#include "mlir/Dialect/Arith/Utils/Utils.h"
1516
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
1617
#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
1718
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
@@ -2914,6 +2915,94 @@ void transform::TileUsingForallOp::build(OpBuilder &builder,
29142915
/*mapping=*/mapping);
29152916
}
29162917

2918+
/// Given `lbs`, `ubs` and `steps` of loops, return (for each loop), the
2919+
/// normalized upper bound.
2920+
static SmallVector<OpFoldResult>
2921+
normalizeUpperBounds(RewriterBase &rewriter, Location loc,
2922+
ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs,
2923+
ArrayRef<OpFoldResult> steps) {
2924+
AffineExpr s0, s1, s2;
2925+
bindSymbols(rewriter.getContext(), s0, s1, s2);
2926+
AffineExpr normalizedUbExpr = (s1 - s0).ceilDiv(s2);
2927+
SmallVector<OpFoldResult> normalizedUbs;
2928+
for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
2929+
OpFoldResult normalizedUb = affine::makeComposedFoldedAffineApply(
2930+
rewriter, loc, normalizedUbExpr, {lb, ub, step});
2931+
normalizedUbs.push_back(normalizedUb);
2932+
}
2933+
return normalizedUbs;
2934+
}
2935+
2936+
/// When a loop is normalized, the uses of the induction variable within the
2937+
/// loop need to replaced with `original_lb + old_iv * original_step`.
2938+
static SmallVector<Value> denormalizeIndVar(RewriterBase &rewriter,
2939+
Location loc, ValueRange ivs,
2940+
ArrayRef<OpFoldResult> lbs,
2941+
ArrayRef<OpFoldResult> steps) {
2942+
AffineExpr s0, s1;
2943+
AffineExpr d0;
2944+
bindSymbols(rewriter.getContext(), s0, s1);
2945+
bindDims(rewriter.getContext(), d0);
2946+
AffineExpr denormExpr = s0 + d0 * s1;
2947+
SmallVector<Value> denormalizedIvs;
2948+
2949+
for (auto [iv, lb, step] : llvm::zip_equal(ivs, lbs, steps)) {
2950+
OpFoldResult denormValue = affine::makeComposedFoldedAffineApply(
2951+
rewriter, loc, denormExpr, ArrayRef<OpFoldResult>{iv, lb, step});
2952+
denormalizedIvs.push_back(
2953+
getValueOrCreateConstantIndexOp(rewriter, loc, denormValue));
2954+
}
2955+
return denormalizedIvs;
2956+
}
2957+
2958+
/// Given a `scf.forall` loop return a loop op with the loop bounds
2959+
/// normalized.
2960+
/// TODO: Replace this with a general utility to normalize `scf.forall`.
2961+
/// At the time of writing, this wasnt done since adding this to `scf`
2962+
/// dialect would disallow using of `affine.apply` operations due
2963+
/// to cyclic dependencies. To avoid churn in lit tests
2964+
/// with the change this was added with, defer that to a follow up.
2965+
static scf::ForallOp normalizeForallLoopOp(RewriterBase &rewriter,
2966+
scf::ForallOp loop) {
2967+
SmallVector<OpFoldResult> lbs = loop.getMixedLowerBound();
2968+
SmallVector<OpFoldResult> ubs = loop.getMixedUpperBound();
2969+
SmallVector<OpFoldResult> steps = loop.getMixedStep();
2970+
2971+
if (llvm::all_of(
2972+
lbs, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); }) &&
2973+
llvm::all_of(
2974+
steps, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 1); })) {
2975+
return loop;
2976+
}
2977+
2978+
Location loc = loop.getLoc();
2979+
SmallVector<OpFoldResult> normalizedUbs =
2980+
normalizeUpperBounds(rewriter, loc, lbs, ubs, steps);
2981+
SmallVector<OpFoldResult> normalizedLbs(normalizedUbs.size(),
2982+
rewriter.getIndexAttr(0));
2983+
SmallVector<OpFoldResult> normalizedSteps(normalizedUbs.size(),
2984+
rewriter.getIndexAttr(1));
2985+
2986+
auto normalizedForallOp = rewriter.create<scf::ForallOp>(
2987+
loc, normalizedLbs, normalizedUbs, normalizedSteps, loop.getOutputs(),
2988+
loop.getMapping(), [](OpBuilder &, Location, ValueRange) {});
2989+
2990+
auto normalizedLoopIvs = normalizedForallOp.getInductionVars();
2991+
OpBuilder::InsertionGuard g(rewriter);
2992+
Block *normalizedLoopBlock = normalizedForallOp.getBody();
2993+
rewriter.setInsertionPointToStart(normalizedLoopBlock);
2994+
2995+
SmallVector<Value> argValues =
2996+
denormalizeIndVar(rewriter, loc, normalizedLoopIvs, lbs, steps);
2997+
argValues.append(normalizedForallOp.getRegionIterArgs().begin(),
2998+
normalizedForallOp.getRegionIterArgs().end());
2999+
Block *origLoopBlock = loop.getBody();
3000+
rewriter.mergeBlocks(origLoopBlock, normalizedLoopBlock, argValues);
3001+
3002+
rewriter.replaceOp(loop, normalizedForallOp);
3003+
return normalizedForallOp;
3004+
}
3005+
29173006
DiagnosedSilenceableFailure transform::tileToForallOpImpl(
29183007
RewriterBase &rewriter, transform::TransformState &state,
29193008
TransformOpInterface transformOp, Operation *target,
@@ -2935,23 +3024,6 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl(
29353024
if (!mixedNumThreads.empty()) {
29363025
options.setNumThreads(mixedNumThreads);
29373026
} else {
2938-
SmallVector<Range> loopRanges = tileableOp.getIterationDomain(rewriter);
2939-
unsigned nLoops = loopRanges.size();
2940-
SmallVector<OpFoldResult> numThreads;
2941-
numThreads.reserve(nLoops);
2942-
AffineExpr s0, s1;
2943-
bindSymbols(rewriter.getContext(), s0, s1);
2944-
AffineExpr divExpr = s0.ceilDiv(s1);
2945-
for (int i = 0, e = std::min(mixedTileSizes.size(), loopRanges.size());
2946-
i < e; ++i) {
2947-
OpFoldResult numTiles = mixedTileSizes[i];
2948-
if (!isConstantIntValue(numTiles, 0))
2949-
numTiles = affine::makeComposedFoldedAffineApply(
2950-
rewriter, tileableOp.getLoc(), divExpr,
2951-
{loopRanges[i].size, numTiles});
2952-
numThreads.push_back(numTiles);
2953-
}
2954-
options.setNumThreads(numThreads);
29553027
options.setTileSizes(mixedTileSizes);
29563028
}
29573029
if (mapping) {
@@ -2962,9 +3034,20 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl(
29623034

29633035
if (failed(maybeTilingResult))
29643036
return transformOp.emitDefaultSilenceableFailure(tileableOp);
3037+
29653038
rewriter.replaceOp(tileableOp, maybeTilingResult->replacements);
29663039

29673040
tilingResult = *maybeTilingResult;
3041+
3042+
if (mixedNumThreads.empty()) {
3043+
auto generatedForallOp = cast<scf::ForallOp>(tilingResult.loops.front());
3044+
OpBuilder::InsertionGuard g(rewriter);
3045+
rewriter.setInsertionPoint(generatedForallOp);
3046+
scf::ForallOp normalizedForallOp =
3047+
normalizeForallLoopOp(rewriter, generatedForallOp);
3048+
tilingResult.loops.front() = normalizedForallOp;
3049+
}
3050+
29683051
return DiagnosedSilenceableFailure::success();
29693052
}
29703053

mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,10 @@ static OpFoldResult getBoundedTileSize(OpBuilder &b, Location loc,
217217
AffineExpr s0, s1, d0;
218218
bindDims(b.getContext(), d0);
219219
bindSymbols(b.getContext(), s0, s1);
220-
AffineMap minMap = AffineMap::get(1, 2, {s0, s1 - d0}, b.getContext());
220+
AffineMap minMap = AffineMap::get(1, 2, {s0 - d0, s1}, b.getContext());
221221
Value size = getValueOrCreateConstantIndexOp(b, loc, loopRange.size);
222222
return affine::makeComposedFoldedAffineMin(
223-
b, loc, minMap, SmallVector<OpFoldResult>{offset, tileSize, size});
223+
b, loc, minMap, SmallVector<OpFoldResult>{offset, size, tileSize});
224224
}
225225

226226
/// Returns true if the maximum tile offset `tileSize * numThreads-1` is less

mlir/lib/Dialect/SCF/Utils/Utils.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,3 +1164,37 @@ scf::ForOp mlir::fuseIndependentSiblingForLoops(scf::ForOp target,
11641164

11651165
return fusedLoop;
11661166
}
1167+
1168+
FailureOr<scf::ForallOp> mlir::normalizeForallOp(RewriterBase &rewriter,
1169+
scf::ForallOp forallOp) {
1170+
SmallVector<OpFoldResult> lbs = forallOp.getMixedLowerBound();
1171+
SmallVector<OpFoldResult> ubs = forallOp.getMixedUpperBound();
1172+
SmallVector<OpFoldResult> steps = forallOp.getMixedStep();
1173+
1174+
if (llvm::all_of(
1175+
lbs, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 0); }) &&
1176+
llvm::all_of(
1177+
steps, [](OpFoldResult ofr) { return isConstantIntValue(ofr, 1); })) {
1178+
return forallOp;
1179+
}
1180+
1181+
SmallVector<OpFoldResult> newLbs, newUbs, newSteps;
1182+
for (auto [lb, ub, step] : llvm::zip_equal(lbs, ubs, steps)) {
1183+
LoopParams normalizedLoopParams =
1184+
emitNormalizedLoopBounds(rewriter, forallOp.getLoc(), lb, ub, step);
1185+
newLbs.push_back(normalizedLoopParams.lowerBound);
1186+
newUbs.push_back(normalizedLoopParams.upperBound);
1187+
newSteps.push_back(normalizedLoopParams.step);
1188+
}
1189+
1190+
auto normalizedForallOp = rewriter.create<scf::ForallOp>(
1191+
forallOp.getLoc(), newLbs, newUbs, newSteps, forallOp.getOutputs(),
1192+
forallOp.getMapping(), [](OpBuilder &, Location, ValueRange) {});
1193+
1194+
rewriter.inlineRegionBefore(forallOp.getBodyRegion(),
1195+
normalizedForallOp.getBodyRegion(),
1196+
normalizedForallOp.getBodyRegion().begin());
1197+
1198+
rewriter.replaceAllOpUsesWith(forallOp, normalizedForallOp);
1199+
return success();
1200+
}

mlir/test/Dialect/Linalg/tile-tensors.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ module attributes {transform.with_named_sequence} {
119119

120120
// -----
121121

122-
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (2, -d0 + s0)>
122+
// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0)[s0] -> (-d0 + s0, 2)>
123123

124124
// CHECK: fold_extract_slice
125125
// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<?x128xf32>

mlir/test/Dialect/Linalg/tile-to-forall.mlir

Lines changed: 31 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -196,10 +196,10 @@ func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C
196196
// CHECK: %[[NT0:.+]] = affine.apply #[[$map0]]()[%[[M]]]
197197
// CHECK: %[[NT1:.+]] = affine.apply #[[$map1]]()[%[[N]]]
198198
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]])
199-
// CHECK: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
200-
// CHECK: %[[TS1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
201-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
202-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
199+
// CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
200+
// CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
201+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
202+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
203203
// CHECK: tensor.extract_slice %[[A]]
204204
// CHECK: tensor.extract_slice %[[B]]
205205
// CHECK: tensor.extract_slice %[[C_BLK]]
@@ -233,11 +233,11 @@ module attributes {transform.with_named_sequence} {
233233
// CHECK-SAME: %[[C:[0-9a-z]+]]: tensor
234234
func.func @matmul_tile_size_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf32>, %C: tensor<100x300xf32>) -> tensor<100x300xf32> {
235235
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (10, 15) shared_outs(%[[C_BLK:.*]] = %[[C]])
236-
// CHECK: %[[TS:.+]] = affine.min #[[$map0]](%[[IV1]])
236+
// CHECK-DAG: %[[TS:.+]] = affine.min #[[$map0]](%[[IV1]])
237+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
238+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
237239
// CHECK-NOT: affine.max
238240
// CHECK-NOT: affine.min
239-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]])
240-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]])
241241
// CHECK: %[[tA:.+]] = tensor.extract_slice %[[A]][%[[LB0]], 0] [10, 200] [1, 1] :
242242
// CHECK: %[[tB:.+]] = tensor.extract_slice %[[B]][0, %[[LB1]]] [200, %[[TS]]] [1, 1] :
243243
// CHECK: %[[tC:.+]] = tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [10, %[[TS]]] [1, 1] :
@@ -452,10 +452,9 @@ module attributes {transform.with_named_sequence} {
452452
// CHECK-DAG: #[[$map0:.+]] = affine_map<()[s0] -> (s0 ceildiv 10)>
453453
// CHECK-DAG: #[[$map1:.+]] = affine_map<()[s0] -> (s0 ceildiv 20)>
454454
// CHECK-DAG: #[[$map2:.+]] = affine_map<(d0)[s0] -> (d0 * -10 + s0, 10)>
455-
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0) -> (0, d0)>
456-
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
457-
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 10)>
458-
// CHECK-DAG: #[[$map6:.+]] = affine_map<(d0) -> (d0 * 20)>
455+
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
456+
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0) -> (d0 * 10)>
457+
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 20)>
459458

460459
// CHECK-LABEL: matmul_tile_size_dynamic(
461460
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<?x?xf32>
@@ -464,18 +463,16 @@ module attributes {transform.with_named_sequence} {
464463
func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>) -> tensor<?x?xf32> {
465464
// CHECK: %[[c1:.*]] = arith.constant 1 : index
466465
// CHECK: %[[c0:.*]] = arith.constant 0 : index
467-
// CHECK: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
468-
// CHECK: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
469-
// CHECK: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
470-
// CHECK: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
471-
// CHECK: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
466+
// CHECK-DAG: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
467+
// CHECK-DAG: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
468+
// CHECK-DAG: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
469+
// CHECK-DAG: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
470+
// CHECK-DAG: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
472471
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]])
473-
// CHECK: %[[TSMIN0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
474-
// CHECK: %[[TS0:.+]] = affine.max #[[$map3]](%[[TSMIN0]])
475-
// CHECK: %[[TSMIN1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
476-
// CHECK: %[[TS1:.+]] = affine.max #[[$map3]](%[[TSMIN1]])
477-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
478-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
472+
// CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
473+
// CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]]
474+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]])
475+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]])
479476
// CHECK: tensor.extract_slice %[[A]][%[[LB0]], 0] [%[[TS0]], %[[K]]] [1, 1] :
480477
// CHECK: tensor.extract_slice %[[B]][0, %[[LB1]]] [%[[K]], %[[TS1]]] [1, 1] :
481478
// CHECK: tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [%[[TS0]], %[[TS1]]] [1, 1] :
@@ -523,10 +520,9 @@ module attributes {transform.with_named_sequence} {
523520
// CHECK-DAG: #[[$map0:.+]] = affine_map<()[s0] -> (s0 ceildiv 10)>
524521
// CHECK-DAG: #[[$map1:.+]] = affine_map<()[s0] -> (s0 ceildiv 20)>
525522
// CHECK-DAG: #[[$map2:.+]] = affine_map<(d0)[s0] -> (d0 * -10 + s0, 10)>
526-
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0) -> (0, d0)>
527-
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
528-
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 10)>
529-
// CHECK-DAG: #[[$map6:.+]] = affine_map<(d0) -> (d0 * 20)>
523+
// CHECK-DAG: #[[$map3:.+]] = affine_map<(d0)[s0] -> (d0 * -20 + s0, 20)>
524+
// CHECK-DAG: #[[$map4:.+]] = affine_map<(d0) -> (d0 * 10)>
525+
// CHECK-DAG: #[[$map5:.+]] = affine_map<(d0) -> (d0 * 20)>
530526

531527
// CHECK-LABEL: matmul_tile_size_dynamic(
532528
// CHECK-SAME: %[[A:[0-9a-z]+]]: tensor<?x?xf32>
@@ -535,18 +531,16 @@ module attributes {transform.with_named_sequence} {
535531
func.func @matmul_tile_size_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>, %C: tensor<?x?xf32>) -> tensor<?x?xf32> {
536532
// CHECK: %[[c1:.*]] = arith.constant 1 : index
537533
// CHECK: %[[c0:.*]] = arith.constant 0 : index
538-
// CHECK: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
539-
// CHECK: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
540-
// CHECK: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
541-
// CHECK: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
542-
// CHECK: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
534+
// CHECK-DAG: %[[M:.+]] = tensor.dim %[[A]], %[[c0]] :
535+
// CHECK-DAG: %[[N:.+]] = tensor.dim %[[B]], %[[c1]] :
536+
// CHECK-DAG: %[[NT0:.+]] = affine.apply #map()[%[[M]]]
537+
// CHECK-DAG: %[[NT1:.+]] = affine.apply #map1()[%[[N]]]
538+
// CHECK-DAG: %[[K:.+]] = tensor.dim %[[A]], %[[c1]] :
543539
// CHECK: scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (%[[NT0]], %[[NT1]]) shared_outs(%[[C_BLK:.*]] = %[[C]])
544-
// CHECK: %[[TSMIN0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
545-
// CHECK: %[[TS0:.+]] = affine.max #[[$map3]](%[[TSMIN0]])
546-
// CHECK: %[[TSMIN1:.+]] = affine.min #[[$map4]](%[[IV1]])[%[[N]]]
547-
// CHECK: %[[TS1:.+]] = affine.max #[[$map3]](%[[TSMIN1]])
548-
// CHECK: %[[LB0:.+]] = affine.apply #[[$map5]](%[[IV0]])
549-
// CHECK: %[[LB1:.+]] = affine.apply #[[$map6]](%[[IV1]])
540+
// CHECK-DAG: %[[TS0:.+]] = affine.min #[[$map2]](%[[IV0]])[%[[M]]]
541+
// CHECK-DAG: %[[TS1:.+]] = affine.min #[[$map3]](%[[IV1]])[%[[N]]]
542+
// CHECK-DAG: %[[LB0:.+]] = affine.apply #[[$map4]](%[[IV0]])
543+
// CHECK-DAG: %[[LB1:.+]] = affine.apply #[[$map5]](%[[IV1]])
550544
// CHECK: tensor.extract_slice %[[A]][%[[LB0]], 0] [%[[TS0]], %[[K]]] [1, 1] :
551545
// CHECK: tensor.extract_slice %[[B]][0, %[[LB1]]] [%[[K]], %[[TS1]]] [1, 1] :
552546
// CHECK: tensor.extract_slice %[[C_BLK]][%[[LB0]], %[[LB1]]] [%[[TS0]], %[[TS1]]] [1, 1] :

mlir/test/Dialect/Linalg/transform-op-tile.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ module {
184184
// CHECK: %[[VS:.*]] = vector.vscale
185185
// CHECK: %[[STEP:.*]] = arith.muli %[[VEC_SIZE]], %[[VS]] : index
186186
// CHECK: scf.for %[[IV:.*]] = %[[C0]] to %[[DIM]] step %[[STEP]] iter_args(%[[VAL:.*]] = %[[ARG_2]]) -> (tensor<?xf32>) {
187-
// CHECK: %[[SIZE:.*]] = affine.min affine_map<(d0)[s0, s1] -> (s0, -d0 + s1)>(%[[IV]])[%[[STEP]], %[[DIM]]]
187+
// CHECK: %[[SIZE:.*]] = affine.min affine_map<(d0)[s0, s1] -> (-d0 + s0, s1)>(%[[IV]])[%[[DIM]], %[[STEP]]]
188188
// CHECK: %[[SLICE_ARG0:.*]] = tensor.extract_slice %[[ARG_0]][%[[IV]]] [%[[SIZE]]] [1] : tensor<?xf32> to tensor<?xf32>
189189
// CHECK: %[[SLICE_ARG1:.*]] = tensor.extract_slice %[[ARG_1]][%[[IV]]] [%[[SIZE]]] [1] : tensor<?xf32> to tensor<?xf32>
190190
// CHECK: %[[SLICE_ARG2:.*]] = tensor.extract_slice %[[VAL]][%[[IV]]] [%[[SIZE]]] [1] : tensor<?xf32> to tensor<?xf32>

mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ module attributes {transform.with_named_sequence} {
428428
transform.yield
429429
}
430430
}
431-
// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
431+
// CHECK: #[[MAP:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)>
432432
// CHECK: func @matmul_sequence_fusion(
433433
// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<?x?xf32>
434434
// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: tensor<?x?xf32>

0 commit comments

Comments
 (0)