-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[mlir][affine] Use value bound inference to determine minimum/maximum trip counts in loop analysis #128113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[mlir][affine] Use value bound inference to determine minimum/maximum trip counts in loop analysis #128113
Changes from 1 commit
23b3a7f
c834f4d
e865351
0b30c4e
fa68fe1
82e48ee
e31ff46
e58e115
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
#include "mlir/Dialect/Affine/IR/AffineValueMap.h" | ||
#include "mlir/Dialect/Affine/Utils.h" | ||
#include "mlir/Dialect/Func/IR/FuncOps.h" | ||
#include "mlir/Dialect/GPU/IR/GPUDialect.h" | ||
#include "mlir/Dialect/MemRef/IR/MemRef.h" | ||
#include "mlir/Dialect/SCF/IR/SCF.h" | ||
#include "mlir/IR/IRMapping.h" | ||
|
@@ -113,11 +114,29 @@ static void replaceIterArgsAndYieldResults(AffineForOp forOp) { | |
std::get<0>(e).replaceAllUsesWith(std::get<1>(e)); | ||
} | ||
|
||
/// Eliminate loops that will never actually execute | ||
LogicalResult mlir::affine::removeInvalidLoop(AffineForOp forOp) { | ||
std::optional<uint64_t> tripCount = getConstantTripCount(forOp); | ||
std::optional<uint64_t> maxTripCount = getMaxConstantTripCount(forOp); | ||
if (!tripCount || *tripCount > 0 || !maxTripCount || *maxTripCount > 0) | ||
return failure(); | ||
|
||
auto iterOperands = forOp.getInits(); | ||
auto results = forOp.getResults(); | ||
for (auto [result, operand] : llvm::zip(results, iterOperands)) | ||
result.replaceAllUsesWith(operand); | ||
|
||
IRRewriter b(forOp); | ||
b.eraseOp(forOp); | ||
return success(); | ||
} | ||
|
||
/// Promotes the loop body of a forOp to its containing block if the forOp | ||
/// was known to have a single iteration. | ||
LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) { | ||
std::optional<uint64_t> tripCount = getConstantTripCount(forOp); | ||
if (!tripCount || *tripCount != 1) | ||
std::optional<uint64_t> maxTripCount = getMaxConstantTripCount(forOp); | ||
if (!tripCount || *tripCount != 1 || !maxTripCount || *maxTripCount != 1) | ||
linuxlonelyeagle marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return failure(); | ||
linuxlonelyeagle marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// TODO: extend this for arbitrary affine bounds. | ||
|
@@ -160,7 +179,8 @@ LogicalResult mlir::affine::promoteIfSingleIteration(AffineForOp forOp) { | |
forOp.getBody()->back().erase(); | ||
parentBlock->getOperations().splice(Block::iterator(forOp), | ||
forOp.getBody()->getOperations()); | ||
forOp.erase(); | ||
IRRewriter b(forOp.getContext()); | ||
b.eraseOp(forOp); | ||
linuxlonelyeagle marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return success(); | ||
} | ||
|
||
|
@@ -884,15 +904,27 @@ void mlir::affine::getTileableBands( | |
/// Unrolls this loop completely. | ||
LogicalResult mlir::affine::loopUnrollFull(AffineForOp forOp) { | ||
std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp); | ||
if (mayBeConstantTripCount.has_value()) { | ||
uint64_t tripCount = *mayBeConstantTripCount; | ||
if (tripCount == 0) | ||
return success(); | ||
if (tripCount == 1) | ||
return promoteIfSingleIteration(forOp); | ||
return loopUnrollByFactor(forOp, tripCount); | ||
} | ||
return failure(); | ||
std::optional<uint64_t> maxMayBeConstantTripCount = | ||
getMaxConstantTripCount(forOp); | ||
|
||
if (!mayBeConstantTripCount.has_value() && | ||
!maxMayBeConstantTripCount.has_value()) | ||
return failure(); | ||
|
||
uint64_t tripCount = *mayBeConstantTripCount; | ||
uint64_t maxTripCount = *maxMayBeConstantTripCount; | ||
|
||
// The values of Trip are all 0, and the invalid loop is deleted. | ||
if (tripCount <= 0 && maxTripCount <= 0) | ||
return removeInvalidLoop(forOp); | ||
|
||
// In special cases, such as in a GPU, only some threads execute this loop. | ||
if (tripCount == 0 && maxTripCount == 1) | ||
return success(); | ||
|
||
if (tripCount == 1 && maxTripCount == 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the trip count is known to be one, how can the max trip count be anything other than one?! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe maxTripCount will be equal to 2. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would getConstantMaxTripCount return a value different from the constant trip count when the trip count is known to be so? It shouldn't - otherwise, it's trivially loose. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are obviously talking about the CPU, which is indeed constant, but for hardware like GPU, threadId is a dynamic thing. The smallest threadid is 0, and the largest threadid is blocksize -1. The value of (upper - thread) / step is obviously not constant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please look at the comments below, I'm wondering if affine-loop-unroll is not a pattern pass causing this issue (if you have the time. I'll continue to work on it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it can run it will definitely be a huge improvement, it's really exciting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A lot of this confusion would be cleared up if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure it would. The semantics of affine loops is to take a minimum of values produced by each individual expression in the upper bound, it's unclear to me why we would need to reason about the upper bound. |
||
return promoteIfSingleIteration(forOp); | ||
linuxlonelyeagle marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return loopUnrollByFactor(forOp, tripCount); | ||
} | ||
|
||
/// Unrolls this loop by the specified factor or by the trip count (if constant) | ||
|
@@ -1013,8 +1045,11 @@ LogicalResult mlir::affine::loopUnrollByFactor( | |
assert(unrollFactor > 0 && "unroll factor should be positive"); | ||
|
||
std::optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp); | ||
std::optional<uint64_t> maxMayBeConstantTripCount = | ||
getMaxConstantTripCount(forOp); | ||
if (unrollFactor == 1) { | ||
if (mayBeConstantTripCount && *mayBeConstantTripCount == 1 && | ||
maxMayBeConstantTripCount && *maxMayBeConstantTripCount == 1 && | ||
failed(promoteIfSingleIteration(forOp))) | ||
return failure(); | ||
return success(); | ||
|
Uh oh!
There was an error while loading. Please reload this page.