@@ -191,6 +191,11 @@ namespace {
191
191
// AA - Used for DAG load/store alias analysis.
192
192
AliasAnalysis *AA;
193
193
194
+ /// This caches all chains that have already been processed in
195
+ /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable
196
+ /// stores candidates.
197
+ SmallPtrSet<SDNode *, 4> ChainsWithoutMergeableStores;
198
+
194
199
/// When an instruction is simplified, add all users of the instruction to
195
200
/// the work lists because they might get more simplified now.
196
201
void AddUsersToWorklist(SDNode *N) {
@@ -779,11 +784,10 @@ namespace {
779
784
bool UseTrunc);
780
785
781
786
/// This is a helper function for mergeConsecutiveStores. Stores that
782
- /// potentially may be merged with St are placed in StoreNodes. RootNode is
783
- /// a chain predecessor to all store candidates.
784
- void getStoreMergeCandidates(StoreSDNode *St,
785
- SmallVectorImpl<MemOpLink> &StoreNodes,
786
- SDNode *&Root);
787
+ /// potentially may be merged with St are placed in StoreNodes. On success,
788
+ /// returns a chain predecessor to all store candidates.
789
+ SDNode *getStoreMergeCandidates(StoreSDNode *St,
790
+ SmallVectorImpl<MemOpLink> &StoreNodes);
787
791
788
792
/// Helper function for mergeConsecutiveStores. Checks if candidate stores
789
793
/// have indirect dependency through their operands. RootNode is the
@@ -1785,6 +1789,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
1785
1789
1786
1790
++NodesCombined;
1787
1791
1792
+ // Invalidate cached info.
1793
+ ChainsWithoutMergeableStores.clear();
1794
+
1788
1795
// If we get back the same node we passed in, rather than a new node or
1789
1796
// zero, we know that the node must have defined multiple values and
1790
1797
// CombineTo was used. Since CombineTo takes care of the worklist
@@ -20514,15 +20521,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
20514
20521
return true;
20515
20522
}
20516
20523
20517
- void DAGCombiner::getStoreMergeCandidates(
20518
- StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes ,
20519
- SDNode *&RootNode ) {
20524
+ SDNode *
20525
+ DAGCombiner::getStoreMergeCandidates( StoreSDNode *St,
20526
+ SmallVectorImpl<MemOpLink> &StoreNodes ) {
20520
20527
// This holds the base pointer, index, and the offset in bytes from the base
20521
20528
// pointer. We must have a base and an offset. Do not handle stores to undef
20522
20529
// base pointers.
20523
20530
BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20524
20531
if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20525
- return;
20532
+ return nullptr ;
20526
20533
20527
20534
SDValue Val = peekThroughBitcasts(St->getValue());
20528
20535
StoreSource StoreSrc = getStoreSource(Val);
@@ -20538,14 +20545,14 @@ void DAGCombiner::getStoreMergeCandidates(
20538
20545
LoadVT = Ld->getMemoryVT();
20539
20546
// Load and store should be the same type.
20540
20547
if (MemVT != LoadVT)
20541
- return;
20548
+ return nullptr ;
20542
20549
// Loads must only have one use.
20543
20550
if (!Ld->hasNUsesOfValue(1, 0))
20544
- return;
20551
+ return nullptr ;
20545
20552
// The memory operands must not be volatile/indexed/atomic.
20546
20553
// TODO: May be able to relax for unordered atomics (see D66309)
20547
20554
if (!Ld->isSimple() || Ld->isIndexed())
20548
- return;
20555
+ return nullptr ;
20549
20556
}
20550
20557
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20551
20558
int64_t &Offset) -> bool {
@@ -20613,6 +20620,27 @@ void DAGCombiner::getStoreMergeCandidates(
20613
20620
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20614
20621
};
20615
20622
20623
+ // We are looking for a root node which is an ancestor to all mergable
20624
+ // stores. We search up through a load, to our root and then down
20625
+ // through all children. For instance we will find Store{1,2,3} if
20626
+ // St is Store1, Store2. or Store3 where the root is not a load
20627
+ // which always true for nonvolatile ops. TODO: Expand
20628
+ // the search to find all valid candidates through multiple layers of loads.
20629
+ //
20630
+ // Root
20631
+ // |-------|-------|
20632
+ // Load Load Store3
20633
+ // | |
20634
+ // Store1 Store2
20635
+ //
20636
+ // FIXME: We should be able to climb and
20637
+ // descend TokenFactors to find candidates as well.
20638
+
20639
+ SDNode *RootNode = St->getChain().getNode();
20640
+ // Bail out if we already analyzed this root node and found nothing.
20641
+ if (ChainsWithoutMergeableStores.contains(RootNode))
20642
+ return nullptr;
20643
+
20616
20644
// Check if the pair of StoreNode and the RootNode already bail out many
20617
20645
// times which is over the limit in dependence check.
20618
20646
auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
@@ -20636,28 +20664,13 @@ void DAGCombiner::getStoreMergeCandidates(
20636
20664
}
20637
20665
};
20638
20666
20639
- // We looking for a root node which is an ancestor to all mergable
20640
- // stores. We search up through a load, to our root and then down
20641
- // through all children. For instance we will find Store{1,2,3} if
20642
- // St is Store1, Store2. or Store3 where the root is not a load
20643
- // which always true for nonvolatile ops. TODO: Expand
20644
- // the search to find all valid candidates through multiple layers of loads.
20645
- //
20646
- // Root
20647
- // |-------|-------|
20648
- // Load Load Store3
20649
- // | |
20650
- // Store1 Store2
20651
- //
20652
- // FIXME: We should be able to climb and
20653
- // descend TokenFactors to find candidates as well.
20654
-
20655
- RootNode = St->getChain().getNode();
20656
-
20657
20667
unsigned NumNodesExplored = 0;
20658
20668
const unsigned MaxSearchNodes = 1024;
20659
20669
if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20660
20670
RootNode = Ldn->getChain().getNode();
20671
+ // Bail out if we already analyzed this root node and found nothing.
20672
+ if (ChainsWithoutMergeableStores.contains(RootNode))
20673
+ return nullptr;
20661
20674
for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20662
20675
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20663
20676
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
@@ -20674,6 +20687,8 @@ void DAGCombiner::getStoreMergeCandidates(
20674
20687
I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20675
20688
TryToAddCandidate(I);
20676
20689
}
20690
+
20691
+ return RootNode;
20677
20692
}
20678
20693
20679
20694
// We need to check that merging these stores does not cause a loop in the
@@ -21304,9 +21319,8 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21304
21319
return false;
21305
21320
21306
21321
SmallVector<MemOpLink, 8> StoreNodes;
21307
- SDNode *RootNode;
21308
21322
// Find potential store merge candidates by searching through chain sub-DAG
21309
- getStoreMergeCandidates(St, StoreNodes, RootNode );
21323
+ SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes);
21310
21324
21311
21325
// Check if there is anything to merge.
21312
21326
if (StoreNodes.size() < 2)
@@ -21362,6 +21376,11 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
21362
21376
llvm_unreachable("Unhandled store source type");
21363
21377
}
21364
21378
}
21379
+
21380
+ // Remember if we failed to optimize, to save compile time.
21381
+ if (!MadeChange)
21382
+ ChainsWithoutMergeableStores.insert(RootNode);
21383
+
21365
21384
return MadeChange;
21366
21385
}
21367
21386
0 commit comments