Skip to content

Reland: [MLIR][Transforms] Fix Mem2Reg removal order to respect dominance #68877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,6 @@ namespace detail {
/// to the results of preceding blocks.
void connectPHINodes(Region &region, const ModuleTranslation &state);

/// Get a topologically sorted list of blocks of the given region.
SetVector<Block *> getTopologicallySortedBlocks(Region &region);

/// Create an LLVM IR constant of `llvmType` from the MLIR attribute `attr`.
/// This currently supports integer, floating point, splat and dense element
/// attributes and combinations thereof. Also, an array attribute with two
Expand Down
3 changes: 3 additions & 0 deletions mlir/include/mlir/Transforms/RegionUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ LogicalResult eraseUnreachableBlocks(RewriterBase &rewriter,
LogicalResult runRegionDCE(RewriterBase &rewriter,
MutableArrayRef<Region> regions);

/// Get a topologically sorted list of blocks of the given region.
SetVector<Block *> getTopologicallySortedBlocks(Region &region);

} // namespace mlir

#endif // MLIR_TRANSFORMS_REGIONUTILS_H_
1 change: 1 addition & 0 deletions mlir/lib/Target/LLVMIR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ add_mlir_translation_library(MLIRTargetLLVMIRExport
MLIRLLVMDialect
MLIRLLVMIRTransforms
MLIRTranslateLib
MLIRTransformUtils
)

add_mlir_translation_library(MLIRToLLVMIRTranslationRegistration
Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Target/LLVMIR/Dialect/OpenACC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ add_mlir_translation_library(MLIROpenACCToLLVMIRTranslation
MLIROpenACCDialect
MLIRSupport
MLIRTargetLLVMIRExport
MLIRTransformUtils
)
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "mlir/Support/LLVM.h"
#include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h"
#include "mlir/Target/LLVMIR/ModuleTranslation.h"
#include "mlir/Transforms/RegionUtils.h"

#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
Expand Down Expand Up @@ -395,8 +396,7 @@ static LogicalResult convertDataOp(acc::DataOp &op,
llvm::BasicBlock *endDataBlock = llvm::BasicBlock::Create(
ctx, "acc.end_data", builder.GetInsertBlock()->getParent());

SetVector<Block *> blocks =
LLVM::detail::getTopologicallySortedBlocks(op.getRegion());
SetVector<Block *> blocks = getTopologicallySortedBlocks(op.getRegion());
for (Block *bb : blocks) {
llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
if (bb->isEntryBlock()) {
Expand Down
1 change: 1 addition & 0 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ add_mlir_translation_library(MLIROpenMPToLLVMIRTranslation
MLIROpenMPDialect
MLIRSupport
MLIRTargetLLVMIRExport
MLIRTransformUtils
)
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,7 @@ static llvm::BasicBlock *convertOmpOpRegions(

// Convert blocks one by one in topological order to ensure
// defs are converted before uses.
SetVector<Block *> blocks =
LLVM::detail::getTopologicallySortedBlocks(region);
SetVector<Block *> blocks = getTopologicallySortedBlocks(region);
for (Block *bb : blocks) {
llvm::BasicBlock *llvmBB = moduleTranslation.lookupBlock(bb);
// Retarget the branch of the entry block to the entry block of the
Expand Down
21 changes: 2 additions & 19 deletions mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "mlir/Support/LogicalResult.h"
#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h"
#include "mlir/Target/LLVMIR/TypeToLLVM.h"
#include "mlir/Transforms/RegionUtils.h"

#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
Expand Down Expand Up @@ -571,24 +572,6 @@ void mlir::LLVM::detail::connectPHINodes(Region &region,
}
}

/// Sort function blocks topologically.
SetVector<Block *>
mlir::LLVM::detail::getTopologicallySortedBlocks(Region &region) {
// For each block that has not been visited yet (i.e. that has no
// predecessors), add it to the list as well as its successors.
SetVector<Block *> blocks;
for (Block &b : region) {
if (blocks.count(&b) == 0) {
llvm::ReversePostOrderTraversal<Block *> traversal(&b);
blocks.insert(traversal.begin(), traversal.end());
}
}
assert(blocks.size() == region.getBlocks().size() &&
"some blocks are not sorted");

return blocks;
}

llvm::CallInst *mlir::LLVM::detail::createIntrinsicCall(
llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic,
ArrayRef<llvm::Value *> args, ArrayRef<llvm::Type *> tys) {
Expand Down Expand Up @@ -922,7 +905,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {

// Then, convert blocks one by one in topological order to ensure defs are
// converted before uses.
auto blocks = detail::getTopologicallySortedBlocks(func.getBody());
auto blocks = getTopologicallySortedBlocks(func.getBody());
for (Block *bb : blocks) {
llvm::IRBuilder<> builder(llvmContext);
if (failed(convertBlock(*bb, bb->isEntryBlock(), builder)))
Expand Down
50 changes: 38 additions & 12 deletions mlir/lib/Transforms/Mem2Reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include "mlir/Interfaces/MemorySlotInterfaces.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/RegionUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/GenericIteratedDominanceFrontier.h"
Expand Down Expand Up @@ -96,6 +98,9 @@ using namespace mlir;

namespace {

using BlockingUsesMap =
llvm::MapVector<Operation *, SmallPtrSet<OpOperand *, 4>>;

/// Information computed during promotion analysis used to perform actual
/// promotion.
struct MemorySlotPromotionInfo {
Expand All @@ -106,7 +111,7 @@ struct MemorySlotPromotionInfo {
/// its uses, it is because the defining ops of the blocking uses requested
/// it. The defining ops therefore must also have blocking uses or be the
/// starting point of the bloccking uses.
DenseMap<Operation *, SmallPtrSet<OpOperand *, 4>> userToBlockingUses;
BlockingUsesMap userToBlockingUses;
};

/// Computes information for basic slot promotion. This will check that direct
Expand All @@ -129,8 +134,7 @@ class MemorySlotPromotionAnalyzer {
/// uses (typically, removing its users because it will delete itself to
/// resolve its own blocking uses). This will fail if one of the transitive
/// users cannot remove a requested use, and should prevent promotion.
LogicalResult computeBlockingUses(
DenseMap<Operation *, SmallPtrSet<OpOperand *, 4>> &userToBlockingUses);
LogicalResult computeBlockingUses(BlockingUsesMap &userToBlockingUses);

/// Computes in which blocks the value stored in the slot is actually used,
/// meaning blocks leading to a load. This method uses `definingBlocks`, the
Expand Down Expand Up @@ -233,7 +237,7 @@ Value MemorySlotPromoter::getLazyDefaultValue() {
}

LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
DenseMap<Operation *, SmallPtrSet<OpOperand *, 4>> &userToBlockingUses) {
BlockingUsesMap &userToBlockingUses) {
// The promotion of an operation may require the promotion of further
// operations (typically, removing operations that use an operation that must
// delete itself). We thus need to start from the use of the slot pointer and
Expand All @@ -243,7 +247,7 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
// use it.
for (OpOperand &use : slot.ptr.getUses()) {
SmallPtrSet<OpOperand *, 4> &blockingUses =
userToBlockingUses.getOrInsertDefault(use.getOwner());
userToBlockingUses[use.getOwner()];
blockingUses.insert(&use);
}

Expand Down Expand Up @@ -281,7 +285,7 @@ LogicalResult MemorySlotPromotionAnalyzer::computeBlockingUses(
assert(llvm::is_contained(user->getResults(), blockingUse->get()));

SmallPtrSetImpl<OpOperand *> &newUserBlockingUseSet =
userToBlockingUses.getOrInsertDefault(blockingUse->getOwner());
userToBlockingUses[blockingUse->getOwner()];
newUserBlockingUseSet.insert(blockingUse);
}
}
Expand Down Expand Up @@ -515,15 +519,37 @@ void MemorySlotPromoter::computeReachingDefInRegion(Region *region,
}
}

/// Sorts `ops` according to dominance. Relies on the topological order of basic
/// blocks to get a deterministic ordering.
static void dominanceSort(SmallVector<Operation *> &ops, Region &region) {
// Produce a topological block order and construct a map to lookup the indices
// of blocks.
DenseMap<Block *, size_t> topoBlockIndices;
SetVector<Block *> topologicalOrder = getTopologicallySortedBlocks(region);
for (auto [index, block] : llvm::enumerate(topologicalOrder))
topoBlockIndices[block] = index;

// Combining the topological order of the basic blocks together with block
// internal operation order guarantees a deterministic, dominance respecting
// order.
llvm::sort(ops, [&](Operation *lhs, Operation *rhs) {
size_t lhsBlockIndex = topoBlockIndices.at(lhs->getBlock());
size_t rhsBlockIndex = topoBlockIndices.at(rhs->getBlock());
if (lhsBlockIndex == rhsBlockIndex)
return lhs->isBeforeInBlock(rhs);
return lhsBlockIndex < rhsBlockIndex;
});
}

void MemorySlotPromoter::removeBlockingUses() {
llvm::SetVector<Operation *> usersToRemoveUses;
for (auto &user : llvm::make_first_range(info.userToBlockingUses))
usersToRemoveUses.insert(user);
SetVector<Operation *> sortedUsersToRemoveUses =
mlir::topologicalSort(usersToRemoveUses);
llvm::SmallVector<Operation *> usersToRemoveUses(
llvm::make_first_range(info.userToBlockingUses));

// Sort according to dominance.
dominanceSort(usersToRemoveUses, *slot.ptr.getParentBlock()->getParent());

llvm::SmallVector<Operation *> toErase;
for (Operation *toPromote : llvm::reverse(sortedUsersToRemoveUses)) {
for (Operation *toPromote : llvm::reverse(usersToRemoveUses)) {
if (auto toPromoteMemOp = dyn_cast<PromotableMemOpInterface>(toPromote)) {
Value reachingDef = reachingDefs.lookup(toPromoteMemOp);
// If no reaching definition is known, this use is outside the reach of
Expand Down
16 changes: 16 additions & 0 deletions mlir/lib/Transforms/Utils/RegionUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -836,3 +836,19 @@ LogicalResult mlir::simplifyRegions(RewriterBase &rewriter,
return success(eliminatedBlocks || eliminatedOpsOrArgs ||
mergedIdenticalBlocks);
}

SetVector<Block *> mlir::getTopologicallySortedBlocks(Region &region) {
// For each block that has not been visited yet (i.e. that has no
// predecessors), add it to the list as well as its successors.
SetVector<Block *> blocks;
for (Block &b : region) {
if (blocks.count(&b) == 0) {
llvm::ReversePostOrderTraversal<Block *> traversal(&b);
blocks.insert(traversal.begin(), traversal.end());
}
}
assert(blocks.size() == region.getBlocks().size() &&
"some blocks are not sorted");

return blocks;
}
13 changes: 13 additions & 0 deletions mlir/test/Dialect/LLVMIR/mem2reg.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -683,3 +683,16 @@ llvm.func @no_inner_alloca_promotion(%arg: i64) -> i64 {
// CHECK: llvm.return %[[RES]] : i64
llvm.return %2 : i64
}

// -----

// CHECK-LABEL: @transitive_reaching_def
llvm.func @transitive_reaching_def() -> !llvm.ptr {
%0 = llvm.mlir.constant(1 : i32) : i32
// CHECK-NOT: alloca
%1 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr
%2 = llvm.load %1 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
llvm.store %2, %1 {alignment = 8 : i64} : !llvm.ptr, !llvm.ptr
%3 = llvm.load %1 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr
llvm.return %3 : !llvm.ptr
}