Skip to content

Commit 8f77d37

Browse files
authored
[DAGCombiner] cache negative result from getMergeStoreCandidates() (#106949)
Cache negative search result from getStoreMergeCandidates() so that mergeConsecutiveStores() does not iterate quadratically over a potentially long sequence of unmergeable stores.
1 parent e64ef63 commit 8f77d37

File tree

1 file changed

+51
-32
lines changed

1 file changed

+51
-32
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 51 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,11 @@ namespace {
191191
// AA - Used for DAG load/store alias analysis.
192192
AliasAnalysis *AA;
193193

194+
/// This caches all chains that have already been processed in
195+
/// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
196+
/// stores candidates.
197+
SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
198+
194199
/// When an instruction is simplified, add all users of the instruction to
195200
/// the work lists because they might get more simplified now.
196201
void AddUsersToWorklist(SDNode *N) {
@@ -779,11 +784,10 @@ namespace {
779784
bool UseTrunc);
780785

781786
/// This is a helper function for mergeConsecutiveStores. Stores that
782-
/// potentially may be merged with St are placed in StoreNodes. RootNode is
783-
/// a chain predecessor to all store candidates.
784-
void getStoreMergeCandidates(StoreSDNode *St,
785-
SmallVectorImpl<MemOpLink> &StoreNodes,
786-
SDNode *&Root);
787+
/// potentially may be merged with St are placed in StoreNodes. On success,
788+
/// returns a chain predecessor to all store candidates.
789+
SDNode *getStoreMergeCandidates(StoreSDNode *St,
790+
SmallVectorImpl<MemOpLink> &StoreNodes);
787791

788792
/// Helper function for mergeConsecutiveStores. Checks if candidate stores
789793
/// have indirect dependency through their operands. RootNode is the
@@ -1785,6 +1789,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
17851789

17861790
++NodesCombined;
17871791

1792+
// Invalidate cached info.
1793+
ChainsWithoutMergeableStores.clear();
1794+
17881795
// If we get back the same node we passed in, rather than a new node or
17891796
// zero, we know that the node must have defined multiple values and
17901797
// CombineTo was used. Since CombineTo takes care of the worklist
@@ -20514,15 +20521,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
2051420521
return true;
2051520522
}
2051620523

20517-
void DAGCombiner::getStoreMergeCandidates(
20518-
StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20519-
SDNode *&RootNode) {
20524+
SDNode *
20525+
DAGCombiner::getStoreMergeCandidates(StoreSDNode *St,
20526+
SmallVectorImpl<MemOpLink> &StoreNodes) {
2052020527
// This holds the base pointer, index, and the offset in bytes from the base
2052120528
// pointer. We must have a base and an offset. Do not handle stores to undef
2052220529
// base pointers.
2052320530
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
2052420531
if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20525-
return;
20532+
return nullptr;
2052620533

2052720534
SDValue Val = peekThroughBitcasts(St->getValue());
2052820535
StoreSource StoreSrc = getStoreSource(Val);
@@ -20538,14 +20545,14 @@ void DAGCombiner::getStoreMergeCandidates(
2053820545
LoadVT = Ld->getMemoryVT();
2053920546
// Load and store should be the same type.
2054020547
if (MemVT != LoadVT)
20541-
return;
20548+
return nullptr;
2054220549
// Loads must only have one use.
2054320550
if (!Ld->hasNUsesOfValue(1, 0))
20544-
return;
20551+
return nullptr;
2054520552
// The memory operands must not be volatile/indexed/atomic.
2054620553
// TODO: May be able to relax for unordered atomics (see D66309)
2054720554
if (!Ld->isSimple() || Ld->isIndexed())
20548-
return;
20555+
return nullptr;
2054920556
}
2055020557
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
2055120558
int64_t &Offset) -> bool {
@@ -20613,6 +20620,27 @@ void DAGCombiner::getStoreMergeCandidates(
2061320620
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
2061420621
};
2061520622

20623+
// We are looking for a root node which is an ancestor to all mergable
20624+
// stores. We search up through a load, to our root and then down
20625+
// through all children. For instance we will find Store{1,2,3} if
20626+
// St is Store1, Store2. or Store3 where the root is not a load
20627+
// which always true for nonvolatile ops. TODO: Expand
20628+
// the search to find all valid candidates through multiple layers of loads.
20629+
//
20630+
// Root
20631+
// |-------|-------|
20632+
// Load Load Store3
20633+
// | |
20634+
// Store1 Store2
20635+
//
20636+
// FIXME: We should be able to climb and
20637+
// descend TokenFactors to find candidates as well.
20638+
20639+
SDNode *RootNode = St->getChain().getNode();
20640+
// Bail out if we already analyzed this root node and found nothing.
20641+
if (ChainsWithoutMergeableStores.contains(RootNode))
20642+
return nullptr;
20643+
2061620644
// Check if the pair of StoreNode and the RootNode already bail out many
2061720645
// times which is over the limit in dependence check.
2061820646
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
@@ -20636,28 +20664,13 @@ void DAGCombiner::getStoreMergeCandidates(
2063620664
}
2063720665
};
2063820666

20639-
// We looking for a root node which is an ancestor to all mergable
20640-
// stores. We search up through a load, to our root and then down
20641-
// through all children. For instance we will find Store{1,2,3} if
20642-
// St is Store1, Store2. or Store3 where the root is not a load
20643-
// which always true for nonvolatile ops. TODO: Expand
20644-
// the search to find all valid candidates through multiple layers of loads.
20645-
//
20646-
// Root
20647-
// |-------|-------|
20648-
// Load Load Store3
20649-
// | |
20650-
// Store1 Store2
20651-
//
20652-
// FIXME: We should be able to climb and
20653-
// descend TokenFactors to find candidates as well.
20654-
20655-
RootNode = St->getChain().getNode();
20656-
2065720667
unsigned NumNodesExplored = 0;
2065820668
const unsigned MaxSearchNodes = 1024;
2065920669
if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
2066020670
RootNode = Ldn->getChain().getNode();
20671+
// Bail out if we already analyzed this root node and found nothing.
20672+
if (ChainsWithoutMergeableStores.contains(RootNode))
20673+
return nullptr;
2066120674
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
2066220675
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
2066320676
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
@@ -20674,6 +20687,8 @@ void DAGCombiner::getStoreMergeCandidates(
2067420687
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
2067520688
TryToAddCandidate(I);
2067620689
}
20690+
20691+
return RootNode;
2067720692
}
2067820693

2067920694
// We need to check that merging these stores does not cause a loop in the
@@ -21304,9 +21319,8 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
2130421319
return false;
2130521320

2130621321
SmallVector<MemOpLink, 8> StoreNodes;
21307-
SDNode *RootNode;
2130821322
// Find potential store merge candidates by searching through chain sub-DAG
21309-
getStoreMergeCandidates(St, StoreNodes, RootNode);
21323+
SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
2131021324

2131121325
// Check if there is anything to merge.
2131221326
if (StoreNodes.size() < 2)
@@ -21362,6 +21376,11 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
2136221376
llvm_unreachable("Unhandled store source type");
2136321377
}
2136421378
}
21379+
21380+
// Remember if we failed to optimize, to save compile time.
21381+
if (!MadeChange)
21382+
ChainsWithoutMergeableStores.insert(RootNode);
21383+
2136521384
return MadeChange;
2136621385
}
2136721386

0 commit comments

Comments
 (0)