Skip to content

Commit 67bd593

Browse files
huntergr-armaaryanshukla
authored andcommitted
[LV] Autovectorization for the all-in-one histogram intrinsic (llvm#91458)
This patch implements limited loop vectorization support for the 'all-in-one' histogram intrinsic. The feature is disabled by default, and when enabled will only vectorize if there are no other users of values in the gather-modify-scatter sequence.
1 parent 5925363 commit 67bd593

File tree

12 files changed

+759
-38
lines changed

12 files changed

+759
-38
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ class MemoryDepChecker {
144144
// on MinDepDistBytes.
145145
BackwardVectorizable,
146146
// Same, but may prevent store-to-load forwarding.
147-
BackwardVectorizableButPreventsForwarding
147+
BackwardVectorizableButPreventsForwarding,
148+
// Access is to a loop loaded value, but is part of a histogram operation.
149+
Histogram
148150
};
149151

150152
/// String version of the types.
@@ -201,7 +203,8 @@ class MemoryDepChecker {
201203
/// Only checks sets with elements in \p CheckDeps.
202204
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
203205
const DenseMap<Value *, SmallVector<const Value *, 16>>
204-
&UnderlyingObjects);
206+
&UnderlyingObjects,
207+
const SmallPtrSetImpl<const Value *> &HistogramPtrs);
205208

206209
/// No memory dependence was encountered that would inhibit
207210
/// vectorization.
@@ -352,7 +355,8 @@ class MemoryDepChecker {
352355
isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
353356
unsigned BIdx,
354357
const DenseMap<Value *, SmallVector<const Value *, 16>>
355-
&UnderlyingObjects);
358+
&UnderlyingObjects,
359+
const SmallPtrSetImpl<const Value *> &HistogramPtrs);
356360

357361
/// Check whether the data dependence could prevent store-load
358362
/// forwarding.
@@ -393,7 +397,8 @@ class MemoryDepChecker {
393397
const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
394398
Instruction *BInst,
395399
const DenseMap<Value *, SmallVector<const Value *, 16>>
396-
&UnderlyingObjects);
400+
&UnderlyingObjects,
401+
const SmallPtrSetImpl<const Value *> &HistogramPtrs);
397402
};
398403

399404
class RuntimePointerChecking;
@@ -445,6 +450,15 @@ struct PointerDiffInfo {
445450
NeedsFreeze(NeedsFreeze) {}
446451
};
447452

453+
struct HistogramInfo {
454+
LoadInst *Load;
455+
Instruction *Update;
456+
StoreInst *Store;
457+
458+
HistogramInfo(LoadInst *Load, Instruction *Update, StoreInst *Store)
459+
: Load(Load), Update(Update), Store(Store) {}
460+
};
461+
448462
/// Holds information about the memory runtime legality checks to verify
449463
/// that a group of pointers do not overlap.
450464
class RuntimePointerChecking {
@@ -625,6 +639,13 @@ class RuntimePointerChecking {
625639
/// Checks for both memory dependences and the SCEV predicates contained in the
626640
/// PSE must be emitted in order for the results of this analysis to be valid.
627641
class LoopAccessInfo {
642+
/// Represents whether the memory access dependencies in the loop:
643+
/// * Prohibit vectorization
644+
/// * Allow for vectorization (possibly with runtime checks)
645+
/// * Allow for vectorization (possibly with runtime checks),
646+
/// as long as histogram operations are supported.
647+
enum VecMemPossible { CantVec = 0, NormalVec = 1, HistogramVec = 2 };
648+
628649
public:
629650
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
630651
const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
@@ -636,7 +657,11 @@ class LoopAccessInfo {
636657
/// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
637658
/// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
638659
/// checked.
639-
bool canVectorizeMemory() const { return CanVecMem; }
660+
bool canVectorizeMemory() const { return CanVecMem == NormalVec; }
661+
662+
bool canVectorizeMemoryWithHistogram() const {
663+
return CanVecMem == NormalVec || CanVecMem == HistogramVec;
664+
}
640665

641666
/// Return true if there is a convergent operation in the loop. There may
642667
/// still be reported runtime pointer checks that would be required, but it is
@@ -664,6 +689,10 @@ class LoopAccessInfo {
664689
unsigned getNumStores() const { return NumStores; }
665690
unsigned getNumLoads() const { return NumLoads;}
666691

692+
const SmallVectorImpl<HistogramInfo> &getHistograms() const {
693+
return Histograms;
694+
}
695+
667696
/// The diagnostics report generated for the analysis. E.g. why we
668697
/// couldn't analyze the loop.
669698
const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
@@ -715,8 +744,8 @@ class LoopAccessInfo {
715744
private:
716745
/// Analyze the loop. Returns true if all memory access in the loop can be
717746
/// vectorized.
718-
bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
719-
DominatorTree *DT);
747+
VecMemPossible analyzeLoop(AAResults *AA, LoopInfo *LI,
748+
const TargetLibraryInfo *TLI, DominatorTree *DT);
720749

721750
/// Check if the structure of the loop allows it to be analyzed by this
722751
/// pass.
@@ -757,7 +786,7 @@ class LoopAccessInfo {
757786
unsigned NumStores = 0;
758787

759788
/// Cache the result of analyzeLoop.
760-
bool CanVecMem = false;
789+
VecMemPossible CanVecMem = CantVec;
761790
bool HasConvergentOp = false;
762791

763792
/// Indicator that there are two non vectorizable stores to the same uniform
@@ -777,6 +806,13 @@ class LoopAccessInfo {
777806
/// If an access has a symbolic strides, this maps the pointer value to
778807
/// the stride symbol.
779808
DenseMap<Value *, const SCEV *> SymbolicStrides;
809+
810+
/// Holds the load, update, and store instructions for all histogram-style
811+
/// operations found in the loop.
812+
SmallVector<HistogramInfo, 2> Histograms;
813+
814+
/// Storing Histogram Pointers
815+
SmallPtrSet<const Value *, 2> HistogramPtrs;
780816
};
781817

782818
/// Return the SCEV corresponding to a pointer with the symbolic stride

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,23 @@ class LoopVectorizationLegality {
390390
unsigned getNumStores() const { return LAI->getNumStores(); }
391391
unsigned getNumLoads() const { return LAI->getNumLoads(); }
392392

393+
std::optional<const HistogramInfo *> getHistogramInfo(Instruction *I) const {
394+
for (const HistogramInfo &HGram : LAI->getHistograms())
395+
if (HGram.Load == I || HGram.Update == I || HGram.Store == I)
396+
return &HGram;
397+
398+
return std::nullopt;
399+
}
400+
401+
std::optional<const HistogramInfo *>
402+
getHistogramForStore(StoreInst *SI) const {
403+
for (const HistogramInfo &HGram : LAI->getHistograms())
404+
if (HGram.Store == SI)
405+
return &HGram;
406+
407+
return std::nullopt;
408+
}
409+
393410
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
394411
return &PSE;
395412
}

0 commit comments

Comments
 (0)