@@ -20322,6 +20322,38 @@ static bool checkTreeSizes(ArrayRef<std::pair<unsigned, unsigned>> Sizes,
20322
20322
return Dev * 96 / (Mean * Mean) == 0;
20323
20323
}
20324
20324
20325
+ namespace {
20326
+
20327
+ /// A group of stores that we'll try to bundle together using vector ops.
20328
+ /// They are ordered using the signed distance of their address operand to the
20329
+ /// address of this group's BaseInstr.
20330
+ struct RelatedStoreInsts {
20331
+ RelatedStoreInsts(unsigned BaseInstrIdx) { reset(BaseInstrIdx); }
20332
+ void reset(unsigned NewBaseInstr) {
20333
+ BaseInstrIdx = NewBaseInstr;
20334
+ Instrs.clear();
20335
+ insertOrLookup(NewBaseInstr, 0);
20336
+ }
20337
+
20338
+ /// Tries to insert \p InstrIdx as the store with a pointer distance of
20339
+ /// \p PtrDist.
20340
+ /// Does nothing if there is already a store with that \p PtrDist.
20341
+ /// \returns The previously associated Instruction index, or std::nullopt
20342
+ std::optional<unsigned> insertOrLookup(unsigned InstrIdx, int PtrDist) {
20343
+ auto [It, Inserted] = Instrs.emplace(PtrDist, InstrIdx);
20344
+ return Inserted ? std::nullopt : std::optional<unsigned>(It->second);
20345
+ }
20346
+
20347
+ /// The index of the Base instruction, i.e. the one with a 0 pointer distance.
20348
+ unsigned BaseInstrIdx;
20349
+
20350
+ /// Maps a pointer distance from \p BaseInstrIdx to an instruction index.
20351
+ using DistToInstMap = std::map<int, unsigned>;
20352
+ DistToInstMap Instrs;
20353
+ };
20354
+
20355
+ } // end anonymous namespace
20356
+
20325
20357
bool SLPVectorizerPass::vectorizeStores(
20326
20358
ArrayRef<StoreInst *> Stores, BoUpSLP &R,
20327
20359
DenseSet<std::tuple<Value *, Value *, Value *, Value *, unsigned>>
@@ -20331,31 +20363,22 @@ bool SLPVectorizerPass::vectorizeStores(
20331
20363
BoUpSLP::ValueSet VectorizedStores;
20332
20364
bool Changed = false;
20333
20365
20334
- struct StoreDistCompare {
20335
- bool operator()(const std::pair<unsigned, int> &Op1,
20336
- const std::pair<unsigned, int> &Op2) const {
20337
- return Op1.second < Op2.second;
20338
- }
20339
- };
20340
- // A set of pairs (index of store in Stores array ref, Distance of the store
20341
- // address relative to base store address in units).
20342
- using StoreIndexToDistSet =
20343
- std::set<std::pair<unsigned, int>, StoreDistCompare>;
20344
- auto TryToVectorize = [&](const StoreIndexToDistSet &Set) {
20366
+ auto TryToVectorize = [&](const RelatedStoreInsts::DistToInstMap &StoreSeq) {
20345
20367
int PrevDist = -1;
20346
20368
BoUpSLP::ValueList Operands;
20347
20369
// Collect the chain into a list.
20348
- for (auto [Idx, Data] : enumerate(Set)) {
20349
- if (Operands.empty() || Data.second - PrevDist == 1) {
20350
- Operands.push_back(Stores[Data.first]);
20351
- PrevDist = Data.second;
20352
- if (Idx != Set.size() - 1)
20370
+ for (auto [Idx, Data] : enumerate(StoreSeq)) {
20371
+ auto &[Dist, InstIdx] = Data;
20372
+ if (Operands.empty() || Dist - PrevDist == 1) {
20373
+ Operands.push_back(Stores[InstIdx]);
20374
+ PrevDist = Dist;
20375
+ if (Idx != StoreSeq.size() - 1)
20353
20376
continue;
20354
20377
}
20355
- auto E = make_scope_exit([&, &DataVar = Data ]() {
20378
+ auto E = make_scope_exit([&, &Dist = Dist, &InstIdx = InstIdx ]() {
20356
20379
Operands.clear();
20357
- Operands.push_back(Stores[DataVar.first ]);
20358
- PrevDist = DataVar.second ;
20380
+ Operands.push_back(Stores[InstIdx ]);
20381
+ PrevDist = Dist ;
20359
20382
});
20360
20383
20361
20384
if (Operands.size() <= 1 ||
@@ -20622,7 +20645,8 @@ bool SLPVectorizerPass::vectorizeStores(
20622
20645
// Need to store the index of the very first store separately, since the set
20623
20646
// may be reordered after the insertion and the first store may be moved. This
20624
20647
// container allows to reduce number of calls of getPointersDiff() function.
20625
- SmallVector<std::pair<unsigned, StoreIndexToDistSet>> SortedStores;
20648
+ SmallVector<RelatedStoreInsts> SortedStores;
20649
+
20626
20650
// Inserts the specified store SI with the given index Idx to the set of the
20627
20651
// stores. If the store with the same distance is found already - stop
20628
20652
// insertion, try to vectorize already found stores. If some stores from this
@@ -20656,56 +20680,52 @@ bool SLPVectorizerPass::vectorizeStores(
20656
20680
// dependencies and no need to waste compile time to try to vectorize them.
20657
20681
// - Try to vectorize the sequence {1, {1, 0}, {3, 2}}.
20658
20682
auto FillStoresSet = [&](unsigned Idx, StoreInst *SI) {
20659
- for (std::pair<unsigned, StoreIndexToDistSet> &Set : SortedStores) {
20683
+ for (RelatedStoreInsts &StoreSeq : SortedStores) {
20660
20684
std::optional<int> Diff = getPointersDiff(
20661
- Stores[Set.first ]->getValueOperand()->getType(),
20662
- Stores[Set.first ]->getPointerOperand(),
20685
+ Stores[StoreSeq.BaseInstrIdx ]->getValueOperand()->getType(),
20686
+ Stores[StoreSeq.BaseInstrIdx ]->getPointerOperand(),
20663
20687
SI->getValueOperand()->getType(), SI->getPointerOperand(), *DL, *SE,
20664
20688
/*StrictCheck=*/true);
20665
20689
if (!Diff)
20666
20690
continue;
20667
- auto It = Set.second.find(std::make_pair(Idx, *Diff));
20668
- if (It == Set.second.end()) {
20669
- Set.second.emplace(Idx, *Diff);
20691
+ std::optional<unsigned> PrevInst =
20692
+ StoreSeq.insertOrLookup(/*InstrIdx=*/Idx, /*PtrDist=*/*Diff);
20693
+ if (!PrevInst) {
20694
+ // No store was associated to that distance. Keep collecting.
20670
20695
return;
20671
20696
}
20672
20697
// Try to vectorize the first found set to avoid duplicate analysis.
20673
- TryToVectorize(Set.second);
20674
- unsigned ItIdx = It->first;
20675
- int ItDist = It->second;
20676
- StoreIndexToDistSet PrevSet;
20677
- copy_if(Set.second, std::inserter(PrevSet, PrevSet.end()),
20678
- [&](const std::pair<unsigned, int> &Pair) {
20679
- return Pair.first > ItIdx;
20698
+ TryToVectorize(StoreSeq.Instrs);
20699
+ RelatedStoreInsts::DistToInstMap PrevSet;
20700
+ copy_if(StoreSeq.Instrs, std::inserter(PrevSet, PrevSet.end()),
20701
+ [&](const std::pair<int, unsigned> &DistAndIdx) {
20702
+ return DistAndIdx.second > *PrevInst;
20680
20703
});
20681
- Set.second.clear();
20682
- Set.first = Idx;
20683
- Set.second.emplace(Idx, 0);
20704
+ StoreSeq.reset(Idx);
20684
20705
// Insert stores that followed previous match to try to vectorize them
20685
20706
// with this store.
20686
- unsigned StartIdx = ItIdx + 1;
20707
+ unsigned StartIdx = *PrevInst + 1;
20687
20708
SmallBitVector UsedStores(Idx - StartIdx);
20688
20709
// Distances to previously found dup store (or this store, since they
20689
20710
// store to the same addresses).
20690
20711
SmallVector<int> Dists(Idx - StartIdx, 0);
20691
- for (const std::pair<unsigned, int> &Pair : reverse(PrevSet)) {
20712
+ for (auto [PtrDist, InstIdx] : reverse(PrevSet)) {
20692
20713
// Do not try to vectorize sequences, we already tried.
20693
- if (VectorizedStores.contains(Stores[Pair.first ]))
20714
+ if (VectorizedStores.contains(Stores[InstIdx ]))
20694
20715
break;
20695
- unsigned BI = Pair.first - StartIdx;
20716
+ unsigned BI = InstIdx - StartIdx;
20696
20717
UsedStores.set(BI);
20697
- Dists[BI] = Pair.second - ItDist ;
20718
+ Dists[BI] = PtrDist - *Diff ;
20698
20719
}
20699
20720
for (unsigned I = StartIdx; I < Idx; ++I) {
20700
20721
unsigned BI = I - StartIdx;
20701
20722
if (UsedStores.test(BI))
20702
- Set.second.emplace (I, Dists[BI]);
20723
+ StoreSeq.insertOrLookup (I, Dists[BI]);
20703
20724
}
20704
20725
return;
20705
20726
}
20706
- auto &Res = SortedStores.emplace_back();
20707
- Res.first = Idx;
20708
- Res.second.emplace(Idx, 0);
20727
+ // We did not find a comparable store, start a new sequence.
20728
+ SortedStores.emplace_back(Idx);
20709
20729
};
20710
20730
Type *PrevValTy = nullptr;
20711
20731
for (auto [I, SI] : enumerate(Stores)) {
@@ -20715,17 +20735,17 @@ bool SLPVectorizerPass::vectorizeStores(
20715
20735
PrevValTy = SI->getValueOperand()->getType();
20716
20736
// Check that we do not try to vectorize stores of different types.
20717
20737
if (PrevValTy != SI->getValueOperand()->getType()) {
20718
- for (auto &Set : SortedStores)
20719
- TryToVectorize(Set.second );
20738
+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20739
+ TryToVectorize(StoreSeq.Instrs );
20720
20740
SortedStores.clear();
20721
20741
PrevValTy = SI->getValueOperand()->getType();
20722
20742
}
20723
20743
FillStoresSet(I, SI);
20724
20744
}
20725
20745
20726
20746
// Final vectorization attempt.
20727
- for (auto &Set : SortedStores)
20728
- TryToVectorize(Set.second );
20747
+ for (RelatedStoreInsts &StoreSeq : SortedStores)
20748
+ TryToVectorize(StoreSeq.Instrs );
20729
20749
20730
20750
return Changed;
20731
20751
}
0 commit comments