-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Revert "[LV] Autovectorization for the all-in-one histogram intrinsic" #98493
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis Author: Graham Hunter (huntergr-arm) ChangesReverts llvm/llvm-project#91458 to deal with post-commit reviewer requests. Patch is 54.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98493.diff 12 Files Affected:
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 9cca70459edef..f6bb044392938 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -144,9 +144,7 @@ class MemoryDepChecker {
// on MinDepDistBytes.
BackwardVectorizable,
// Same, but may prevent store-to-load forwarding.
- BackwardVectorizableButPreventsForwarding,
- // Access is to a loop loaded value, but is part of a histogram operation.
- Histogram
+ BackwardVectorizableButPreventsForwarding
};
/// String version of the types.
@@ -203,8 +201,7 @@ class MemoryDepChecker {
/// Only checks sets with elements in \p CheckDeps.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs);
+ &UnderlyingObjects);
/// No memory dependence was encountered that would inhibit
/// vectorization.
@@ -355,8 +352,7 @@ class MemoryDepChecker {
isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs);
+ &UnderlyingObjects);
/// Check whether the data dependence could prevent store-load
/// forwarding.
@@ -397,8 +393,7 @@ class MemoryDepChecker {
const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
Instruction *BInst,
const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs);
+ &UnderlyingObjects);
};
class RuntimePointerChecking;
@@ -450,15 +445,6 @@ struct PointerDiffInfo {
NeedsFreeze(NeedsFreeze) {}
};
-struct HistogramInfo {
- LoadInst *Load;
- Instruction *Update;
- StoreInst *Store;
-
- HistogramInfo(LoadInst *Load, Instruction *Update, StoreInst *Store)
- : Load(Load), Update(Update), Store(Store) {}
-};
-
/// Holds information about the memory runtime legality checks to verify
/// that a group of pointers do not overlap.
class RuntimePointerChecking {
@@ -639,13 +625,6 @@ class RuntimePointerChecking {
/// Checks for both memory dependences and the SCEV predicates contained in the
/// PSE must be emitted in order for the results of this analysis to be valid.
class LoopAccessInfo {
- /// Represents whether the memory access dependencies in the loop:
- /// * Prohibit vectorization
- /// * Allow for vectorization (possibly with runtime checks)
- /// * Allow for vectorization (possibly with runtime checks),
- /// as long as histogram operations are supported.
- enum VecMemPossible { CantVec = 0, NormalVec = 1, HistogramVec = 2 };
-
public:
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
@@ -657,11 +636,7 @@ class LoopAccessInfo {
/// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
/// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
/// checked.
- bool canVectorizeMemory() const { return CanVecMem == NormalVec; }
-
- bool canVectorizeMemoryWithHistogram() const {
- return CanVecMem == NormalVec || CanVecMem == HistogramVec;
- }
+ bool canVectorizeMemory() const { return CanVecMem; }
/// Return true if there is a convergent operation in the loop. There may
/// still be reported runtime pointer checks that would be required, but it is
@@ -689,10 +664,6 @@ class LoopAccessInfo {
unsigned getNumStores() const { return NumStores; }
unsigned getNumLoads() const { return NumLoads;}
- const SmallVectorImpl<HistogramInfo> &getHistograms() const {
- return Histograms;
- }
-
/// The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
@@ -744,8 +715,8 @@ class LoopAccessInfo {
private:
/// Analyze the loop. Returns true if all memory access in the loop can be
/// vectorized.
- VecMemPossible analyzeLoop(AAResults *AA, LoopInfo *LI,
- const TargetLibraryInfo *TLI, DominatorTree *DT);
+ bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
+ DominatorTree *DT);
/// Check if the structure of the loop allows it to be analyzed by this
/// pass.
@@ -786,7 +757,7 @@ class LoopAccessInfo {
unsigned NumStores = 0;
/// Cache the result of analyzeLoop.
- VecMemPossible CanVecMem = CantVec;
+ bool CanVecMem = false;
bool HasConvergentOp = false;
/// Indicator that there are two non vectorizable stores to the same uniform
@@ -806,13 +777,6 @@ class LoopAccessInfo {
/// If an access has a symbolic strides, this maps the pointer value to
/// the stride symbol.
DenseMap<Value *, const SCEV *> SymbolicStrides;
-
- /// Holds the load, update, and store instructions for all histogram-style
- /// operations found in the loop.
- SmallVector<HistogramInfo, 2> Histograms;
-
- /// Storing Histogram Pointers
- SmallPtrSet<const Value *, 2> HistogramPtrs;
};
/// Return the SCEV corresponding to a pointer with the symbolic stride
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 4361e0f2cf038..2ff17bd2f7a71 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -390,23 +390,6 @@ class LoopVectorizationLegality {
unsigned getNumStores() const { return LAI->getNumStores(); }
unsigned getNumLoads() const { return LAI->getNumLoads(); }
- std::optional<const HistogramInfo *> getHistogramInfo(Instruction *I) const {
- for (const HistogramInfo &HGram : LAI->getHistograms())
- if (HGram.Load == I || HGram.Update == I || HGram.Store == I)
- return &HGram;
-
- return std::nullopt;
- }
-
- std::optional<const HistogramInfo *>
- getHistogramForStore(StoreInst *SI) const {
- for (const HistogramInfo &HGram : LAI->getHistograms())
- if (HGram.Store == SI)
- return &HGram;
-
- return std::nullopt;
- }
-
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index dd9827003d200..018861a665c4c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -21,7 +21,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
@@ -71,8 +70,6 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-accesses"
-STATISTIC(HistogramsDetected, "Number of Histograms detected");
-
static cl::opt<unsigned, true>
VectorizationFactor("force-vector-width", cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."),
@@ -735,23 +732,6 @@ class AccessAnalysis {
return UnderlyingObjects;
}
- /// Find Histogram counts that match high-level code in loops:
- /// \code
- /// buckets[indices[i]]+=step;
- /// \endcode
- ///
- /// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
- /// array the computed histogram. It uses a BinOp to sum all counts, storing
- /// them using a loop-variant index Load from the 'indices' input array.
- ///
- /// On successful matches it updates the STATISTIC 'HistogramsDetected',
- /// regardless of hardware support. When there is support, it additionally
- /// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
- /// used to update histogram in \p HistogramPtrs.
- void findHistograms(StoreInst *HSt, Loop *TheLoop,
- SmallVectorImpl<HistogramInfo> &Histograms,
- SmallPtrSetImpl<const Value *> &HistogramPtrs);
-
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
@@ -1718,7 +1698,6 @@ MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
case NoDep:
case Forward:
case BackwardVectorizable:
- case Histogram:
return VectorizationSafetyStatus::Safe;
case Unknown:
@@ -1739,7 +1718,6 @@ bool MemoryDepChecker::Dependence::isBackward() const {
case ForwardButPreventsForwarding:
case Unknown:
case IndirectUnsafe:
- case Histogram:
return false;
case BackwardVectorizable:
@@ -1751,7 +1729,7 @@ bool MemoryDepChecker::Dependence::isBackward() const {
}
bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
- return isBackward() || Type == Unknown || Type == Histogram;
+ return isBackward() || Type == Unknown;
}
bool MemoryDepChecker::Dependence::isForward() const {
@@ -1766,7 +1744,6 @@ bool MemoryDepChecker::Dependence::isForward() const {
case Backward:
case BackwardVectorizableButPreventsForwarding:
case IndirectUnsafe:
- case Histogram:
return false;
}
llvm_unreachable("unexpected DepType!");
@@ -1936,8 +1913,8 @@ std::variant<MemoryDepChecker::Dependence::DepType,
MemoryDepChecker::getDependenceDistanceStrideAndSize(
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
- const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
auto &DL = InnermostLoop->getHeader()->getDataLayout();
auto &SE = *PSE.getSE();
auto [APtr, AIsWrite] = A;
@@ -1955,12 +1932,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;
- // Ignore Histogram count updates as they are handled by the Intrinsic. This
- // happens when the same pointer is first used to read from and then is used
- // to write to.
- if (!AIsWrite && BIsWrite && APtr == BPtr && HistogramPtrs.contains(APtr))
- return MemoryDepChecker::Dependence::Histogram;
-
int64_t StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
.value_or(0);
@@ -2037,14 +2008,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
- const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
assert(AIdx < BIdx && "Must pass arguments in program order");
// Get the dependence distance, stride, type size and what access writes for
// the dependence between A and B.
auto Res = getDependenceDistanceStrideAndSize(
- A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects, HistogramPtrs);
+ A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);
@@ -2280,8 +2251,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
bool MemoryDepChecker::areDepsSafe(
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
- const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
MinDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
@@ -2324,9 +2295,8 @@ bool MemoryDepChecker::areDepsSafe(
if (*I1 > *I2)
std::swap(A, B);
- Dependence::DepType Type =
- isDependent(*A.first, A.second, *B.first, B.second,
- UnderlyingObjects, HistogramPtrs);
+ Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
+ B.second, UnderlyingObjects);
mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
@@ -2377,8 +2347,7 @@ const char *MemoryDepChecker::Dependence::DepName[] = {
"ForwardButPreventsForwarding",
"Backward",
"BackwardVectorizable",
- "BackwardVectorizableButPreventsForwarding",
- "Histogram"};
+ "BackwardVectorizableButPreventsForwarding"};
void MemoryDepChecker::Dependence::print(
raw_ostream &OS, unsigned Depth,
@@ -2426,9 +2395,9 @@ bool LoopAccessInfo::canAnalyzeLoop() {
return true;
}
-LoopAccessInfo::VecMemPossible
-LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
- const TargetLibraryInfo *TLI, DominatorTree *DT) {
+bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
+ const TargetLibraryInfo *TLI,
+ DominatorTree *DT) {
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
@@ -2468,7 +2437,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// With both a non-vectorizable memory instruction and a convergent
// operation, found in this loop, no reason to continue the search.
if (HasComplexMemInst && HasConvergentOp)
- return CantVec;
+ return false;
// Avoid hitting recordAnalysis multiple times.
if (HasComplexMemInst)
@@ -2544,7 +2513,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
} // Next block.
if (HasComplexMemInst)
- return CantVec;
+ return false;
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
@@ -2553,7 +2522,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// care if the pointers are *restrict*.
if (!Stores.size()) {
LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
- return NormalVec;
+ return true;
}
MemoryDepChecker::DepCandidates DependentAccesses;
@@ -2606,7 +2575,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
LLVM_DEBUG(
dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
- return NormalVec;
+ return true;
}
for (LoadInst *LD : Loads) {
@@ -2653,16 +2622,13 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// other reads in this loop then is it safe to vectorize.
if (NumReadWrites == 1 && NumReads == 0) {
LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
- return NormalVec;
+ return true;
}
// Build dependence sets and check whether we need a runtime pointer bounds
// check.
Accesses.buildDependenceSets();
- for (StoreInst *ST : Stores)
- Accesses.findHistograms(ST, TheLoop, Histograms, HistogramPtrs);
-
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
Value *UncomputablePtr = nullptr;
@@ -2675,7 +2641,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
<< "cannot identify array bounds";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n");
- return CantVec;
+ return false;
}
LLVM_DEBUG(
@@ -2684,9 +2650,9 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
bool DepsAreSafe = true;
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
- DepsAreSafe = DepChecker->areDepsSafe(
- DependentAccesses, Accesses.getDependenciesToCheck(),
- Accesses.getUnderlyingObjects(), HistogramPtrs);
+ DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
+ Accesses.getDependenciesToCheck(),
+ Accesses.getUnderlyingObjects());
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
@@ -2708,7 +2674,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
recordAnalysis("CantCheckMemDepsAtRunTime", I)
<< "cannot check memory dependencies at runtime";
LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
- return CantVec;
+ return false;
}
DepsAreSafe = true;
}
@@ -2719,7 +2685,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
<< "cannot add control dependency to convergent operation";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
"would be needed with a convergent operation\n");
- return CantVec;
+ return false;
}
if (DepsAreSafe) {
@@ -2727,11 +2693,11 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't")
<< " need runtime memory checks.\n");
- return Histograms.empty() ? NormalVec : HistogramVec;
+ return true;
}
emitUnsafeDependenceRemark();
- return CantVec;
+ return false;
}
void LoopAccessInfo::emitUnsafeDependenceRemark() {
@@ -2791,9 +2757,6 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
case MemoryDepChecker::Dependence::Unknown:
R << "\nUnknown data dependence.";
break;
- case MemoryDepChecker::Dependence::Histogram:
- R << "\nHistogram data dependence.";
- break;
}
if (Instruction *I = Dep.getSource(getDepChecker())) {
@@ -3065,7 +3028,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
}
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
- if (CanVecMem != CantVec) {
+ if (CanVecMem) {
OS.indent(Depth) << "Memory dependences are safe";
const MemoryDepChecker &DC = getDepChecker();
if (!DC.isSafeForAnyVectorWidth())
@@ -3138,79 +3101,6 @@ void LoopAccessInfoManager::clear() {
LoopAccessInfoMap.erase(L);
}
-void AccessAnalysis::findHistograms(
- StoreInst *HSt, Loop *TheLoop, SmallVectorImpl<HistogramInfo> &Histograms,
- SmallPtrSetImpl<const Value *> &HistogramPtrs) {
-
- // Store value must come from a Binary Operation.
- Instruction *HPtrInstr = nullptr;
- BinaryOperator *HBinOp = nullptr;
- if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
- return;
-
- // BinOp must be an Add or a Sub modifying the bucket value by a
- // loop invariant amount.
- // FIXME: We assume the loop invariant term is on the RHS.
- // Fine for an immediate/constant, but maybe not a generic value?
- Value *HIncVal = nullptr;
- if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
- !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
- return;
-
- // Make sure the increment value is loop invariant.
- if (!TheLoop->isLoopInvariant(HIncVal))
- return;
-
- // The address to store is calculated through a GEP Instruction.
- // FIXME: Support GEPs with more operands.
- GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
- if (!HPtr || HPtr->getNumOperands() > 2)
- return;
-
- // Check that the index is calculated by loading from another array. Ignore
- // any extensions.
- // FIXME: Support indices from other sources that a linear load from memory?
- Value *HIdx = HPtr->getOperand(1);
- Instruction *IdxInst = nullptr;
- if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Instruction(IdxInst))))
- return;
-
- // Currently restricting this to linear addressing when loading indices.
- LoadInst *VLoad = dyn_cas...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Graham Hunter (huntergr-arm) ChangesReverts llvm/llvm-project#91458 to deal with post-commit reviewer requests. Patch is 54.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98493.diff 12 Files Affected:
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 9cca70459edef..f6bb044392938 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -144,9 +144,7 @@ class MemoryDepChecker {
// on MinDepDistBytes.
BackwardVectorizable,
// Same, but may prevent store-to-load forwarding.
- BackwardVectorizableButPreventsForwarding,
- // Access is to a loop loaded value, but is part of a histogram operation.
- Histogram
+ BackwardVectorizableButPreventsForwarding
};
/// String version of the types.
@@ -203,8 +201,7 @@ class MemoryDepChecker {
/// Only checks sets with elements in \p CheckDeps.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs);
+ &UnderlyingObjects);
/// No memory dependence was encountered that would inhibit
/// vectorization.
@@ -355,8 +352,7 @@ class MemoryDepChecker {
isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs);
+ &UnderlyingObjects);
/// Check whether the data dependence could prevent store-load
/// forwarding.
@@ -397,8 +393,7 @@ class MemoryDepChecker {
const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
Instruction *BInst,
const DenseMap<Value *, SmallVector<const Value *, 16>>
- &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs);
+ &UnderlyingObjects);
};
class RuntimePointerChecking;
@@ -450,15 +445,6 @@ struct PointerDiffInfo {
NeedsFreeze(NeedsFreeze) {}
};
-struct HistogramInfo {
- LoadInst *Load;
- Instruction *Update;
- StoreInst *Store;
-
- HistogramInfo(LoadInst *Load, Instruction *Update, StoreInst *Store)
- : Load(Load), Update(Update), Store(Store) {}
-};
-
/// Holds information about the memory runtime legality checks to verify
/// that a group of pointers do not overlap.
class RuntimePointerChecking {
@@ -639,13 +625,6 @@ class RuntimePointerChecking {
/// Checks for both memory dependences and the SCEV predicates contained in the
/// PSE must be emitted in order for the results of this analysis to be valid.
class LoopAccessInfo {
- /// Represents whether the memory access dependencies in the loop:
- /// * Prohibit vectorization
- /// * Allow for vectorization (possibly with runtime checks)
- /// * Allow for vectorization (possibly with runtime checks),
- /// as long as histogram operations are supported.
- enum VecMemPossible { CantVec = 0, NormalVec = 1, HistogramVec = 2 };
-
public:
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI,
const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
@@ -657,11 +636,7 @@ class LoopAccessInfo {
/// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
/// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
/// checked.
- bool canVectorizeMemory() const { return CanVecMem == NormalVec; }
-
- bool canVectorizeMemoryWithHistogram() const {
- return CanVecMem == NormalVec || CanVecMem == HistogramVec;
- }
+ bool canVectorizeMemory() const { return CanVecMem; }
/// Return true if there is a convergent operation in the loop. There may
/// still be reported runtime pointer checks that would be required, but it is
@@ -689,10 +664,6 @@ class LoopAccessInfo {
unsigned getNumStores() const { return NumStores; }
unsigned getNumLoads() const { return NumLoads;}
- const SmallVectorImpl<HistogramInfo> &getHistograms() const {
- return Histograms;
- }
-
/// The diagnostics report generated for the analysis. E.g. why we
/// couldn't analyze the loop.
const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
@@ -744,8 +715,8 @@ class LoopAccessInfo {
private:
/// Analyze the loop. Returns true if all memory access in the loop can be
/// vectorized.
- VecMemPossible analyzeLoop(AAResults *AA, LoopInfo *LI,
- const TargetLibraryInfo *TLI, DominatorTree *DT);
+ bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
+ DominatorTree *DT);
/// Check if the structure of the loop allows it to be analyzed by this
/// pass.
@@ -786,7 +757,7 @@ class LoopAccessInfo {
unsigned NumStores = 0;
/// Cache the result of analyzeLoop.
- VecMemPossible CanVecMem = CantVec;
+ bool CanVecMem = false;
bool HasConvergentOp = false;
/// Indicator that there are two non vectorizable stores to the same uniform
@@ -806,13 +777,6 @@ class LoopAccessInfo {
/// If an access has a symbolic strides, this maps the pointer value to
/// the stride symbol.
DenseMap<Value *, const SCEV *> SymbolicStrides;
-
- /// Holds the load, update, and store instructions for all histogram-style
- /// operations found in the loop.
- SmallVector<HistogramInfo, 2> Histograms;
-
- /// Storing Histogram Pointers
- SmallPtrSet<const Value *, 2> HistogramPtrs;
};
/// Return the SCEV corresponding to a pointer with the symbolic stride
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 4361e0f2cf038..2ff17bd2f7a71 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -390,23 +390,6 @@ class LoopVectorizationLegality {
unsigned getNumStores() const { return LAI->getNumStores(); }
unsigned getNumLoads() const { return LAI->getNumLoads(); }
- std::optional<const HistogramInfo *> getHistogramInfo(Instruction *I) const {
- for (const HistogramInfo &HGram : LAI->getHistograms())
- if (HGram.Load == I || HGram.Update == I || HGram.Store == I)
- return &HGram;
-
- return std::nullopt;
- }
-
- std::optional<const HistogramInfo *>
- getHistogramForStore(StoreInst *SI) const {
- for (const HistogramInfo &HGram : LAI->getHistograms())
- if (HGram.Store == SI)
- return &HGram;
-
- return std::nullopt;
- }
-
PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index dd9827003d200..018861a665c4c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -21,7 +21,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
@@ -71,8 +70,6 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "loop-accesses"
-STATISTIC(HistogramsDetected, "Number of Histograms detected");
-
static cl::opt<unsigned, true>
VectorizationFactor("force-vector-width", cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."),
@@ -735,23 +732,6 @@ class AccessAnalysis {
return UnderlyingObjects;
}
- /// Find Histogram counts that match high-level code in loops:
- /// \code
- /// buckets[indices[i]]+=step;
- /// \endcode
- ///
- /// It matches a pattern starting from \p HSt, which Stores to the 'buckets'
- /// array the computed histogram. It uses a BinOp to sum all counts, storing
- /// them using a loop-variant index Load from the 'indices' input array.
- ///
- /// On successful matches it updates the STATISTIC 'HistogramsDetected',
- /// regardless of hardware support. When there is support, it additionally
- /// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
- /// used to update histogram in \p HistogramPtrs.
- void findHistograms(StoreInst *HSt, Loop *TheLoop,
- SmallVectorImpl<HistogramInfo> &Histograms,
- SmallPtrSetImpl<const Value *> &HistogramPtrs);
-
private:
typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
@@ -1718,7 +1698,6 @@ MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
case NoDep:
case Forward:
case BackwardVectorizable:
- case Histogram:
return VectorizationSafetyStatus::Safe;
case Unknown:
@@ -1739,7 +1718,6 @@ bool MemoryDepChecker::Dependence::isBackward() const {
case ForwardButPreventsForwarding:
case Unknown:
case IndirectUnsafe:
- case Histogram:
return false;
case BackwardVectorizable:
@@ -1751,7 +1729,7 @@ bool MemoryDepChecker::Dependence::isBackward() const {
}
bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
- return isBackward() || Type == Unknown || Type == Histogram;
+ return isBackward() || Type == Unknown;
}
bool MemoryDepChecker::Dependence::isForward() const {
@@ -1766,7 +1744,6 @@ bool MemoryDepChecker::Dependence::isForward() const {
case Backward:
case BackwardVectorizableButPreventsForwarding:
case IndirectUnsafe:
- case Histogram:
return false;
}
llvm_unreachable("unexpected DepType!");
@@ -1936,8 +1913,8 @@ std::variant<MemoryDepChecker::Dependence::DepType,
MemoryDepChecker::getDependenceDistanceStrideAndSize(
const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
- const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
auto &DL = InnermostLoop->getHeader()->getDataLayout();
auto &SE = *PSE.getSE();
auto [APtr, AIsWrite] = A;
@@ -1955,12 +1932,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;
- // Ignore Histogram count updates as they are handled by the Intrinsic. This
- // happens when the same pointer is first used to read from and then is used
- // to write to.
- if (!AIsWrite && BIsWrite && APtr == BPtr && HistogramPtrs.contains(APtr))
- return MemoryDepChecker::Dependence::Histogram;
-
int64_t StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true)
.value_or(0);
@@ -2037,14 +2008,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
unsigned BIdx,
- const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
assert(AIdx < BIdx && "Must pass arguments in program order");
// Get the dependence distance, stride, type size and what access writes for
// the dependence between A and B.
auto Res = getDependenceDistanceStrideAndSize(
- A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects, HistogramPtrs);
+ A, InstMap[AIdx], B, InstMap[BIdx], UnderlyingObjects);
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);
@@ -2280,8 +2251,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
bool MemoryDepChecker::areDepsSafe(
DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
- const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects,
- const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
+ const DenseMap<Value *, SmallVector<const Value *, 16>>
+ &UnderlyingObjects) {
MinDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
@@ -2324,9 +2295,8 @@ bool MemoryDepChecker::areDepsSafe(
if (*I1 > *I2)
std::swap(A, B);
- Dependence::DepType Type =
- isDependent(*A.first, A.second, *B.first, B.second,
- UnderlyingObjects, HistogramPtrs);
+ Dependence::DepType Type = isDependent(*A.first, A.second, *B.first,
+ B.second, UnderlyingObjects);
mergeInStatus(Dependence::isSafeForVectorization(Type));
// Gather dependences unless we accumulated MaxDependences
@@ -2377,8 +2347,7 @@ const char *MemoryDepChecker::Dependence::DepName[] = {
"ForwardButPreventsForwarding",
"Backward",
"BackwardVectorizable",
- "BackwardVectorizableButPreventsForwarding",
- "Histogram"};
+ "BackwardVectorizableButPreventsForwarding"};
void MemoryDepChecker::Dependence::print(
raw_ostream &OS, unsigned Depth,
@@ -2426,9 +2395,9 @@ bool LoopAccessInfo::canAnalyzeLoop() {
return true;
}
-LoopAccessInfo::VecMemPossible
-LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
- const TargetLibraryInfo *TLI, DominatorTree *DT) {
+bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
+ const TargetLibraryInfo *TLI,
+ DominatorTree *DT) {
// Holds the Load and Store instructions.
SmallVector<LoadInst *, 16> Loads;
SmallVector<StoreInst *, 16> Stores;
@@ -2468,7 +2437,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// With both a non-vectorizable memory instruction and a convergent
// operation, found in this loop, no reason to continue the search.
if (HasComplexMemInst && HasConvergentOp)
- return CantVec;
+ return false;
// Avoid hitting recordAnalysis multiple times.
if (HasComplexMemInst)
@@ -2544,7 +2513,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
} // Next block.
if (HasComplexMemInst)
- return CantVec;
+ return false;
// Now we have two lists that hold the loads and the stores.
// Next, we find the pointers that they use.
@@ -2553,7 +2522,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// care if the pointers are *restrict*.
if (!Stores.size()) {
LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
- return NormalVec;
+ return true;
}
MemoryDepChecker::DepCandidates DependentAccesses;
@@ -2606,7 +2575,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
LLVM_DEBUG(
dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
<< "checks.\n");
- return NormalVec;
+ return true;
}
for (LoadInst *LD : Loads) {
@@ -2653,16 +2622,13 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// other reads in this loop then is it safe to vectorize.
if (NumReadWrites == 1 && NumReads == 0) {
LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
- return NormalVec;
+ return true;
}
// Build dependence sets and check whether we need a runtime pointer bounds
// check.
Accesses.buildDependenceSets();
- for (StoreInst *ST : Stores)
- Accesses.findHistograms(ST, TheLoop, Histograms, HistogramPtrs);
-
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
Value *UncomputablePtr = nullptr;
@@ -2675,7 +2641,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
<< "cannot identify array bounds";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
<< "the array bounds.\n");
- return CantVec;
+ return false;
}
LLVM_DEBUG(
@@ -2684,9 +2650,9 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
bool DepsAreSafe = true;
if (Accesses.isDependencyCheckNeeded()) {
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
- DepsAreSafe = DepChecker->areDepsSafe(
- DependentAccesses, Accesses.getDependenciesToCheck(),
- Accesses.getUnderlyingObjects(), HistogramPtrs);
+ DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
+ Accesses.getDependenciesToCheck(),
+ Accesses.getUnderlyingObjects());
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
@@ -2708,7 +2674,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
recordAnalysis("CantCheckMemDepsAtRunTime", I)
<< "cannot check memory dependencies at runtime";
LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
- return CantVec;
+ return false;
}
DepsAreSafe = true;
}
@@ -2719,7 +2685,7 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
<< "cannot add control dependency to convergent operation";
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
"would be needed with a convergent operation\n");
- return CantVec;
+ return false;
}
if (DepsAreSafe) {
@@ -2727,11 +2693,11 @@ LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
<< (PtrRtChecking->Need ? "" : " don't")
<< " need runtime memory checks.\n");
- return Histograms.empty() ? NormalVec : HistogramVec;
+ return true;
}
emitUnsafeDependenceRemark();
- return CantVec;
+ return false;
}
void LoopAccessInfo::emitUnsafeDependenceRemark() {
@@ -2791,9 +2757,6 @@ void LoopAccessInfo::emitUnsafeDependenceRemark() {
case MemoryDepChecker::Dependence::Unknown:
R << "\nUnknown data dependence.";
break;
- case MemoryDepChecker::Dependence::Histogram:
- R << "\nHistogram data dependence.";
- break;
}
if (Instruction *I = Dep.getSource(getDepChecker())) {
@@ -3065,7 +3028,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
}
void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
- if (CanVecMem != CantVec) {
+ if (CanVecMem) {
OS.indent(Depth) << "Memory dependences are safe";
const MemoryDepChecker &DC = getDepChecker();
if (!DC.isSafeForAnyVectorWidth())
@@ -3138,79 +3101,6 @@ void LoopAccessInfoManager::clear() {
LoopAccessInfoMap.erase(L);
}
-void AccessAnalysis::findHistograms(
- StoreInst *HSt, Loop *TheLoop, SmallVectorImpl<HistogramInfo> &Histograms,
- SmallPtrSetImpl<const Value *> &HistogramPtrs) {
-
- // Store value must come from a Binary Operation.
- Instruction *HPtrInstr = nullptr;
- BinaryOperator *HBinOp = nullptr;
- if (!match(HSt, m_Store(m_BinOp(HBinOp), m_Instruction(HPtrInstr))))
- return;
-
- // BinOp must be an Add or a Sub modifying the bucket value by a
- // loop invariant amount.
- // FIXME: We assume the loop invariant term is on the RHS.
- // Fine for an immediate/constant, but maybe not a generic value?
- Value *HIncVal = nullptr;
- if (!match(HBinOp, m_Add(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))) &&
- !match(HBinOp, m_Sub(m_Load(m_Specific(HPtrInstr)), m_Value(HIncVal))))
- return;
-
- // Make sure the increment value is loop invariant.
- if (!TheLoop->isLoopInvariant(HIncVal))
- return;
-
- // The address to store is calculated through a GEP Instruction.
- // FIXME: Support GEPs with more operands.
- GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
- if (!HPtr || HPtr->getNumOperands() > 2)
- return;
-
- // Check that the index is calculated by loading from another array. Ignore
- // any extensions.
- // FIXME: Support indices from other sources that a linear load from memory?
- Value *HIdx = HPtr->getOperand(1);
- Instruction *IdxInst = nullptr;
- if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Instruction(IdxInst))))
- return;
-
- // Currently restricting this to linear addressing when loading indices.
- LoadInst *VLoad = dyn_cas...
[truncated]
|
llvm#98493) Reverts llvm#91458 to deal with post-commit reviewer requests.
Reverts #91458 to deal with post-commit reviewer requests.