21
21
#include " llvm/ADT/SmallPtrSet.h"
22
22
#include " llvm/ADT/SmallSet.h"
23
23
#include " llvm/ADT/SmallVector.h"
24
+ #include " llvm/ADT/Statistic.h"
24
25
#include " llvm/Analysis/AliasAnalysis.h"
25
26
#include " llvm/Analysis/AliasSetTracker.h"
26
27
#include " llvm/Analysis/LoopAnalysisManager.h"
@@ -69,6 +70,8 @@ using namespace llvm::PatternMatch;
69
70
70
71
#define DEBUG_TYPE " loop-accesses"
71
72
73
+ STATISTIC (HistogramsDetected, " Number of Histograms detected" );
74
+
72
75
static cl::opt<unsigned , true >
73
76
VectorizationFactor (" force-vector-width" , cl::Hidden,
74
77
cl::desc (" Sets the SIMD width. Zero is autoselect." ),
@@ -730,6 +733,23 @@ class AccessAnalysis {
730
733
return UnderlyingObjects;
731
734
}
732
735
736
+ // / Find Histogram counts that match high-level code in loops:
737
+ // / \code
738
+ // / buckets[indices[i]]+=step;
739
+ // / \endcode
740
+ // /
741
+ // / It matches a pattern starting from \p HSt, which Stores to the 'buckets'
742
+ // / array the computed histogram. It uses a BinOp to sum all counts, storing
743
+ // / them using a loop-variant index Load from the 'indices' input array.
744
+ // /
745
+ // / On successful matches it updates the STATISTIC 'HistogramsDetected',
746
+ // / regardless of hardware support. When there is support, it additionally
747
+ // / stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
748
+ // / used to update histogram in \p HistogramPtrs.
749
+ void findHistograms (StoreInst *HSt,
750
+ SmallVectorImpl<HistogramInfo> &Histograms,
751
+ SmallPtrSetImpl<const Value *> &HistogramPtrs);
752
+
733
753
private:
734
754
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1 >> PtrAccessMap;
735
755
@@ -1947,7 +1967,8 @@ getDependenceDistanceStrideAndSize(
1947
1967
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
1948
1968
const DenseMap<Value *, const SCEV *> &Strides,
1949
1969
const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
1950
- PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) {
1970
+ PredicatedScalarEvolution &PSE, const Loop *InnermostLoop,
1971
+ const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
1951
1972
auto &DL = InnermostLoop->getHeader ()->getModule ()->getDataLayout ();
1952
1973
auto &SE = *PSE.getSE ();
1953
1974
auto [APtr, AIsWrite] = A;
@@ -1965,6 +1986,15 @@ getDependenceDistanceStrideAndSize(
1965
1986
BPtr->getType ()->getPointerAddressSpace ())
1966
1987
return MemoryDepChecker::Dependence::Unknown;
1967
1988
1989
+ // Ignore Histogram count updates as they are handled by the Intrinsic. This
1990
+ // happens when the same pointer is first used to read from and then is used
1991
+ // to write to.
1992
+ if (!AIsWrite && BIsWrite && APtr == BPtr && HistogramPtrs.contains (APtr)) {
1993
+ LLVM_DEBUG (dbgs () << " LAA: Histogram: Update is safely ignored. Pointer: "
1994
+ << *APtr);
1995
+ return MemoryDepChecker::Dependence::NoDep;
1996
+ }
1997
+
1968
1998
int64_t StrideAPtr =
1969
1999
getPtrStride (PSE, ATy, APtr, InnermostLoop, Strides, true ).value_or (0 );
1970
2000
int64_t StrideBPtr =
@@ -2018,15 +2048,15 @@ getDependenceDistanceStrideAndSize(
2018
2048
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent (
2019
2049
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
2020
2050
unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
2021
- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2022
- &UnderlyingObjects ) {
2051
+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2052
+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
2023
2053
assert (AIdx < BIdx && " Must pass arguments in program order" );
2024
2054
2025
2055
// Get the dependence distance, stride, type size and what access writes for
2026
2056
// the dependence between A and B.
2027
2057
auto Res = getDependenceDistanceStrideAndSize (
2028
2058
A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE,
2029
- InnermostLoop);
2059
+ InnermostLoop, HistogramPtrs );
2030
2060
if (std::holds_alternative<Dependence::DepType>(Res))
2031
2061
return std::get<Dependence::DepType>(Res);
2032
2062
@@ -2240,8 +2270,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2240
2270
bool MemoryDepChecker::areDepsSafe (
2241
2271
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
2242
2272
const DenseMap<Value *, const SCEV *> &Strides,
2243
- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2244
- &UnderlyingObjects ) {
2273
+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2274
+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
2245
2275
2246
2276
MinDepDistBytes = -1 ;
2247
2277
SmallPtrSet<MemAccessInfo, 8 > Visited;
@@ -2286,7 +2316,7 @@ bool MemoryDepChecker::areDepsSafe(
2286
2316
2287
2317
Dependence::DepType Type =
2288
2318
isDependent (*A.first , A.second , *B.first , B.second , Strides,
2289
- UnderlyingObjects);
2319
+ UnderlyingObjects, HistogramPtrs );
2290
2320
mergeInStatus (Dependence::isSafeForVectorization (Type));
2291
2321
2292
2322
// Gather dependences unless we accumulated MaxDependences
@@ -2622,6 +2652,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2622
2652
// check.
2623
2653
Accesses.buildDependenceSets ();
2624
2654
2655
+ for (StoreInst *ST : Stores)
2656
+ Accesses.findHistograms (ST, Histograms, HistogramPtrs);
2657
+
2625
2658
// Find pointers with computable bounds. We are going to use this information
2626
2659
// to place a runtime bound check.
2627
2660
Value *UncomputablePtr = nullptr ;
@@ -2646,7 +2679,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
2646
2679
LLVM_DEBUG (dbgs () << " LAA: Checking memory dependencies\n " );
2647
2680
CanVecMem = DepChecker->areDepsSafe (
2648
2681
DependentAccesses, Accesses.getDependenciesToCheck (), SymbolicStrides,
2649
- Accesses.getUnderlyingObjects ());
2682
+ Accesses.getUnderlyingObjects (), HistogramPtrs );
2650
2683
2651
2684
if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck ()) {
2652
2685
LLVM_DEBUG (dbgs () << " LAA: Retrying with memory checks\n " );
@@ -3084,6 +3117,99 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
3084
3117
return *I.first ->second ;
3085
3118
}
3086
3119
3120
+ void AccessAnalysis::findHistograms (
3121
+ StoreInst *HSt, SmallVectorImpl<HistogramInfo> &Histograms,
3122
+ SmallPtrSetImpl<const Value *> &HistogramPtrs) {
3123
+ LLVM_DEBUG (dbgs () << " LAA: Attempting to match histogram from " << *HSt
3124
+ << " \n " );
3125
+ // Store value must come from a Binary Operation.
3126
+ Instruction *HPtrInstr = nullptr ;
3127
+ BinaryOperator *HBinOp = nullptr ;
3128
+ if (!match (HSt, m_Store (m_BinOp (HBinOp), m_Instruction (HPtrInstr)))) {
3129
+ LLVM_DEBUG (dbgs () << " \t No BinOp\n " );
3130
+ return ;
3131
+ }
3132
+
3133
+ // BinOp must be an Add or a Sub operating modifying the bucket value by a
3134
+ // loop invariant amount.
3135
+ // FIXME: We assume the loop invariant term is on the RHS.
3136
+ // Fine for an immediate/constant, but maybe not a generic value?
3137
+ Value *HIncVal = nullptr ;
3138
+ if (!match (HBinOp, m_Add (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal))) &&
3139
+ !match (HBinOp, m_Sub (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal)))) {
3140
+ LLVM_DEBUG (dbgs () << " \t No matching load\n " );
3141
+ return ;
3142
+ }
3143
+ Instruction *IndexedLoad = cast<Instruction>(HBinOp->getOperand (0 ));
3144
+
3145
+ // The address to store is calculated through a GEP Instruction.
3146
+ // FIXME: Support GEPs with more operands.
3147
+ GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
3148
+ if (!HPtr || HPtr->getNumOperands () > 2 ) {
3149
+ LLVM_DEBUG (dbgs () << " \t Too many GEP operands\n " );
3150
+ return ;
3151
+ }
3152
+
3153
+ // Check that the index is calculated by loading from another array. Ignore
3154
+ // any extensions.
3155
+ // FIXME: Support indices from other sources that a linear load from memory?
3156
+ Value *HIdx = HPtr->getOperand (1 );
3157
+ Instruction *IdxInst = nullptr ;
3158
+ // FIXME: Can this fail? Maybe if IdxInst isn't an instruction. Just need to
3159
+ // look through extensions, find another way?
3160
+ if (!match (HIdx, m_ZExtOrSExtOrSelf (m_Instruction (IdxInst))))
3161
+ return ;
3162
+
3163
+ // Currently restricting this to linear addressing when loading indices.
3164
+ LoadInst *VLoad = dyn_cast<LoadInst>(IdxInst);
3165
+ Value *VPtrVal;
3166
+ if (!VLoad || !match (VLoad, m_Load (m_Value (VPtrVal)))) {
3167
+ LLVM_DEBUG (dbgs () << " \t Bad Index Load\n " );
3168
+ return ;
3169
+ }
3170
+
3171
+ if (!isa<SCEVAddRecExpr>(PSE.getSCEV (VPtrVal))) {
3172
+ LLVM_DEBUG (dbgs () << " \t Cannot determine index load stride\n " );
3173
+ return ;
3174
+ }
3175
+
3176
+ // FIXME: support smaller types of input arrays. Integers can be promoted
3177
+ // for codegen.
3178
+ Type *VLoadTy = VLoad->getType ();
3179
+ if (!VLoadTy->isIntegerTy () || (VLoadTy->getScalarSizeInBits () != 32 &&
3180
+ VLoadTy->getScalarSizeInBits () != 64 )) {
3181
+ LLVM_DEBUG (dbgs () << " \t Unsupported bucket type: " << *VLoadTy << " \n " );
3182
+ return ;
3183
+ }
3184
+
3185
+ // Ensure we'll have the same mask by checking that all parts of the histogram
3186
+ // are in the same block.
3187
+ // FIXME: Could use dominance checks instead?
3188
+ if (IndexedLoad->getParent () != HBinOp->getParent () ||
3189
+ IndexedLoad->getParent () != HSt->getParent ()) {
3190
+ LLVM_DEBUG (dbgs () << " \t Different parent blocks\n " );
3191
+ return ;
3192
+ }
3193
+
3194
+ // A histogram pointer may only alias to itself, and must only have two uses,
3195
+ // the load and the store.
3196
+ for (AliasSet &AS : AST)
3197
+ if (AS.isMustAlias () || AS.isMayAlias ())
3198
+ if ((is_contained (AS.getPointers (), HPtr) && AS.size () > 1 ) ||
3199
+ HPtr->getNumUses () != 2 ) {
3200
+ LLVM_DEBUG (dbgs () << " \t Aliasing problem\n " );
3201
+ return ;
3202
+ }
3203
+
3204
+ LLVM_DEBUG (dbgs () << " LAA: Found Histogram Operation: " << *HBinOp << " \n " );
3205
+ HistogramsDetected++;
3206
+
3207
+ // Store the operations that make up the histogram.
3208
+ Histograms.emplace_back (IndexedLoad, HBinOp, HSt);
3209
+ // Store pointers used to write those counts in the computed histogram.
3210
+ HistogramPtrs.insert (HPtr);
3211
+ }
3212
+
3087
3213
bool LoopAccessInfoManager::invalidate (
3088
3214
Function &F, const PreservedAnalyses &PA,
3089
3215
FunctionAnalysisManager::Invalidator &Inv) {
0 commit comments