@@ -144,7 +144,9 @@ class MemoryDepChecker {
144
144
// on MinDepDistBytes.
145
145
BackwardVectorizable,
146
146
// Same, but may prevent store-to-load forwarding.
147
- BackwardVectorizableButPreventsForwarding
147
+ BackwardVectorizableButPreventsForwarding,
148
+ // Access is to a loop loaded value, but is part of a histogram operation.
149
+ Histogram
148
150
};
149
151
150
152
// / String version of the types.
@@ -201,7 +203,8 @@ class MemoryDepChecker {
201
203
// / Only checks sets with elements in \p CheckDeps.
202
204
bool areDepsSafe (DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
203
205
const DenseMap<Value *, SmallVector<const Value *, 16 >>
204
- &UnderlyingObjects);
206
+ &UnderlyingObjects,
207
+ const SmallPtrSetImpl<const Value *> &HistogramPtrs);
205
208
206
209
// / No memory dependence was encountered that would inhibit
207
210
// / vectorization.
@@ -352,7 +355,8 @@ class MemoryDepChecker {
352
355
isDependent (const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
353
356
unsigned BIdx,
354
357
const DenseMap<Value *, SmallVector<const Value *, 16 >>
355
- &UnderlyingObjects);
358
+ &UnderlyingObjects,
359
+ const SmallPtrSetImpl<const Value *> &HistogramPtrs);
356
360
357
361
// / Check whether the data dependence could prevent store-load
358
362
// / forwarding.
@@ -393,7 +397,8 @@ class MemoryDepChecker {
393
397
const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
394
398
Instruction *BInst,
395
399
const DenseMap<Value *, SmallVector<const Value *, 16 >>
396
- &UnderlyingObjects);
400
+ &UnderlyingObjects,
401
+ const SmallPtrSetImpl<const Value *> &HistogramPtrs);
397
402
};
398
403
399
404
class RuntimePointerChecking ;
@@ -445,6 +450,15 @@ struct PointerDiffInfo {
445
450
NeedsFreeze (NeedsFreeze) {}
446
451
};
447
452
453
+ struct HistogramInfo {
454
+ LoadInst *Load;
455
+ Instruction *Update;
456
+ StoreInst *Store;
457
+
458
+ HistogramInfo (LoadInst *Load, Instruction *Update, StoreInst *Store)
459
+ : Load(Load), Update(Update), Store(Store) {}
460
+ };
461
+
448
462
// / Holds information about the memory runtime legality checks to verify
449
463
// / that a group of pointers do not overlap.
450
464
class RuntimePointerChecking {
@@ -625,6 +639,13 @@ class RuntimePointerChecking {
625
639
// / Checks for both memory dependences and the SCEV predicates contained in the
626
640
// / PSE must be emitted in order for the results of this analysis to be valid.
627
641
class LoopAccessInfo {
642
+ // / Represents whether the memory access dependencies in the loop:
643
+ // / * Prohibit vectorization
644
+ // / * Allow for vectorization (possibly with runtime checks)
645
+ // / * Allow for vectorization (possibly with runtime checks),
646
+ // / as long as histogram operations are supported.
647
+ enum VecMemPossible { CantVec = 0 , NormalVec = 1 , HistogramVec = 2 };
648
+
628
649
public:
629
650
LoopAccessInfo (Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
630
651
const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
@@ -636,7 +657,11 @@ class LoopAccessInfo {
636
657
// / hasStoreStoreDependenceInvolvingLoopInvariantAddress and
637
658
// / hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
638
659
// / checked.
639
- bool canVectorizeMemory () const { return CanVecMem; }
660
+ bool canVectorizeMemory () const { return CanVecMem == NormalVec; }
661
+
662
+ bool canVectorizeMemoryWithHistogram () const {
663
+ return CanVecMem == NormalVec || CanVecMem == HistogramVec;
664
+ }
640
665
641
666
// / Return true if there is a convergent operation in the loop. There may
642
667
// / still be reported runtime pointer checks that would be required, but it is
@@ -664,6 +689,10 @@ class LoopAccessInfo {
664
689
unsigned getNumStores () const { return NumStores; }
665
690
unsigned getNumLoads () const { return NumLoads;}
666
691
692
+ const SmallVectorImpl<HistogramInfo> &getHistograms () const {
693
+ return Histograms;
694
+ }
695
+
667
696
// / The diagnostics report generated for the analysis. E.g. why we
668
697
// / couldn't analyze the loop.
669
698
const OptimizationRemarkAnalysis *getReport () const { return Report.get (); }
@@ -715,8 +744,8 @@ class LoopAccessInfo {
715
744
private:
716
745
// / Analyze the loop. Returns true if all memory access in the loop can be
717
746
// / vectorized.
718
- bool analyzeLoop (AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI ,
719
- DominatorTree *DT);
747
+ VecMemPossible analyzeLoop (AAResults *AA, LoopInfo *LI,
748
+ const TargetLibraryInfo *TLI, DominatorTree *DT);
720
749
721
750
// / Check if the structure of the loop allows it to be analyzed by this
722
751
// / pass.
@@ -757,7 +786,7 @@ class LoopAccessInfo {
757
786
unsigned NumStores = 0 ;
758
787
759
788
// / Cache the result of analyzeLoop.
760
- bool CanVecMem = false ;
789
+ VecMemPossible CanVecMem = CantVec ;
761
790
bool HasConvergentOp = false ;
762
791
763
792
// / Indicator that there are two non vectorizable stores to the same uniform
@@ -777,6 +806,13 @@ class LoopAccessInfo {
777
806
// / If an access has a symbolic strides, this maps the pointer value to
778
807
// / the stride symbol.
779
808
DenseMap<Value *, const SCEV *> SymbolicStrides;
809
+
810
+ // / Holds the load, update, and store instructions for all histogram-style
811
+ // / operations found in the loop.
812
+ SmallVector<HistogramInfo, 2 > Histograms;
813
+
814
+ // / Storing Histogram Pointers
815
+ SmallPtrSet<const Value *, 2 > HistogramPtrs;
780
816
};
781
817
782
818
// / Return the SCEV corresponding to a pointer with the symbolic stride
0 commit comments