-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop #96752
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
#include "llvm/Analysis/Loads.h" | ||
#include "llvm/Analysis/AliasAnalysis.h" | ||
#include "llvm/Analysis/AssumeBundleQueries.h" | ||
#include "llvm/Analysis/LoopAccessAnalysis.h" | ||
#include "llvm/Analysis/LoopInfo.h" | ||
#include "llvm/Analysis/MemoryBuiltins.h" | ||
#include "llvm/Analysis/MemoryLocation.h" | ||
|
@@ -275,84 +276,88 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { | |
bool llvm::isDereferenceableAndAlignedInLoop( | ||
LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, | ||
AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) { | ||
const Align Alignment = LI->getAlign(); | ||
auto &DL = LI->getDataLayout(); | ||
Value *Ptr = LI->getPointerOperand(); | ||
|
||
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()), | ||
DL.getTypeStoreSize(LI->getType()).getFixedValue()); | ||
const Align Alignment = LI->getAlign(); | ||
|
||
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); | ||
|
||
// If given a uniform (i.e. non-varying) address, see if we can prove the | ||
// access is safe within the loop w/o needing predication. | ||
if (L->isLoopInvariant(Ptr)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this code path was previously not tested and dropping it may cause a regression. I added 747f7f3 which should cover this codepath. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem. Thanks for adding the test! I've re-added the code now. |
||
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL, | ||
HeaderFirstNonPHI, AC, &DT); | ||
return isDereferenceableAndAlignedPointer( | ||
Ptr, Alignment, EltSize, DL, L->getHeader()->getFirstNonPHI(), AC, &DT); | ||
|
||
const SCEV *PtrScev = SE.getSCEV(Ptr); | ||
auto *AddRec = dyn_cast<SCEVAddRecExpr>(PtrScev); | ||
|
||
// Otherwise, check to see if we have a repeating access pattern where we can | ||
// prove that all accesses are well aligned and dereferenceable. | ||
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr)); | ||
// Check to see if we have a repeating access pattern and it's possible | ||
// to prove all accesses are well aligned. | ||
if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine()) | ||
return false; | ||
|
||
auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE)); | ||
if (!Step) | ||
return false; | ||
|
||
auto TC = SE.getSmallConstantMaxTripCount(L, Predicates); | ||
if (!TC) | ||
// For the moment, restrict ourselves to the case where the access size is a | ||
// multiple of the requested alignment and the base is aligned. | ||
// TODO: generalize if a case found which warrants | ||
if (EltSize.urem(Alignment.value()) != 0) | ||
return false; | ||
|
||
// TODO: Handle overlapping accesses. | ||
// We should be computing AccessSize as (TC - 1) * Step + EltSize. | ||
if (EltSize.sgt(Step->getAPInt())) | ||
if (EltSize.ugt(Step->getAPInt().abs())) | ||
return false; | ||
|
||
const SCEV *MaxBECount = | ||
SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates); | ||
if (isa<SCEVCouldNotCompute>(MaxBECount)) | ||
return false; | ||
|
||
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess( | ||
L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr); | ||
if (isa<SCEVCouldNotCompute>(AccessStart) || | ||
isa<SCEVCouldNotCompute>(AccessEnd)) | ||
return false; | ||
|
||
// Compute the total access size for access patterns with unit stride and | ||
// patterns with gaps. For patterns with unit stride, Step and EltSize are the | ||
// same. | ||
// For patterns with gaps (i.e. non unit stride), we are | ||
// accessing EltSize bytes at every Step. | ||
APInt AccessSize = TC * Step->getAPInt(); | ||
// Try to get the access size. | ||
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart); | ||
APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff); | ||
|
||
assert(SE.isLoopInvariant(AddRec->getStart(), L) && | ||
"implied by addrec definition"); | ||
Value *Base = nullptr; | ||
if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) { | ||
Base = StartS->getValue(); | ||
} else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) { | ||
// Handle (NewBase + offset) as start value. | ||
const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0)); | ||
const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1)); | ||
if (StartS->getNumOperands() == 2 && Offset && NewBase) { | ||
// The following code below assumes the offset is unsigned, but GEP | ||
// offsets are treated as signed so we can end up with a signed value | ||
// here too. For example, suppose the initial PHI value is (i8 255), | ||
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1). | ||
if (Offset->getAPInt().isNegative()) | ||
return false; | ||
APInt AccessSize; | ||
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) { | ||
Base = NewBase->getValue(); | ||
AccessSize = MaxPtrDiff; | ||
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) { | ||
if (MinAdd->getNumOperands() != 2) | ||
return false; | ||
|
||
// For the moment, restrict ourselves to the case where the offset is a | ||
// multiple of the requested alignment and the base is aligned. | ||
// TODO: generalize if a case found which warrants | ||
if (Offset->getAPInt().urem(Alignment.value()) != 0) | ||
return false; | ||
Base = NewBase->getValue(); | ||
bool Overflow = false; | ||
AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow); | ||
if (Overflow) | ||
return false; | ||
} | ||
} | ||
const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0)); | ||
const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1)); | ||
if (!Offset || !NewBase) | ||
return false; | ||
|
||
if (!Base) | ||
return false; | ||
// The following code below assumes the offset is unsigned, but GEP | ||
// offsets are treated as signed so we can end up with a signed value | ||
// here too. For example, suppose the initial PHI value is (i8 255), | ||
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1). | ||
if (Offset->getAPInt().isNegative()) | ||
return false; | ||
|
||
// For the moment, restrict ourselves to the case where the access size is a | ||
// multiple of the requested alignment and the base is aligned. | ||
// TODO: generalize if a case found which warrants | ||
if (EltSize.urem(Alignment.value()) != 0) | ||
// For the moment, restrict ourselves to the case where the offset is a | ||
// multiple of the requested alignment and the base is aligned. | ||
// TODO: generalize if a case found which warrants | ||
if (Offset->getAPInt().urem(Alignment.value()) != 0) | ||
return false; | ||
|
||
AccessSize = MaxPtrDiff + Offset->getAPInt(); | ||
Base = NewBase->getValue(); | ||
} else | ||
return false; | ||
|
||
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI(); | ||
return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL, | ||
HeaderFirstNonPHI, AC, &DT); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,42 +190,29 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( | |
Members.push_back(Index); | ||
} | ||
|
||
/// Calculate Start and End points of memory access. | ||
/// Let's assume A is the first access and B is a memory access on N-th loop | ||
/// iteration. Then B is calculated as: | ||
/// B = A + Step*N . | ||
/// Step value may be positive or negative. | ||
/// N is a calculated back-edge taken count: | ||
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 | ||
/// Start and End points are calculated in the following way: | ||
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, | ||
/// where SizeOfElt is the size of single memory access in bytes. | ||
/// | ||
/// There is no conflict when the intervals are disjoint: | ||
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) | ||
static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( | ||
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, | ||
PredicatedScalarEvolution &PSE, | ||
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might be good to add an overload without the extra EltSize argument for users here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount, | ||
ScalarEvolution *SE, | ||
DenseMap<std::pair<const SCEV *, Type *>, | ||
std::pair<const SCEV *, const SCEV *>> &PointerBounds) { | ||
ScalarEvolution *SE = PSE.getSE(); | ||
|
||
auto [Iter, Ins] = PointerBounds.insert( | ||
{{PtrExpr, AccessTy}, | ||
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}}); | ||
if (!Ins) | ||
return Iter->second; | ||
std::pair<const SCEV *, const SCEV *>> *PointerBounds) { | ||
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair; | ||
if (PointerBounds) { | ||
auto [Iter, Ins] = PointerBounds->insert( | ||
{{PtrExpr, AccessTy}, | ||
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}}); | ||
if (!Ins) | ||
return Iter->second; | ||
PtrBoundsPair = &Iter->second; | ||
} | ||
|
||
const SCEV *ScStart; | ||
const SCEV *ScEnd; | ||
|
||
if (SE->isLoopInvariant(PtrExpr, Lp)) { | ||
ScStart = ScEnd = PtrExpr; | ||
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) { | ||
const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as PSE caches the max symbolic BTC (I think) it's probably better to avoid the extra parameter. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The extra parameter is required in order to reuse this function in Loads.cpp, because isDereferenceableAndAlignedInLoop only has access to ScalarEvolution. In the places where The alternative would be to have two versions of |
||
|
||
ScStart = AR->getStart(); | ||
ScEnd = AR->evaluateAtIteration(Ex, *SE); | ||
ScEnd = AR->evaluateAtIteration(MaxBECount, *SE); | ||
const SCEV *Step = AR->getStepRecurrence(*SE); | ||
|
||
// For expressions with negative step, the upper bound is ScStart and the | ||
|
@@ -244,16 +231,18 @@ static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( | |
return {SE->getCouldNotCompute(), SE->getCouldNotCompute()}; | ||
|
||
assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant"); | ||
assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant"); | ||
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant"); | ||
|
||
// Add the size of the pointed element to ScEnd. | ||
auto &DL = Lp->getHeader()->getDataLayout(); | ||
Type *IdxTy = DL.getIndexType(PtrExpr->getType()); | ||
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); | ||
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); | ||
|
||
Iter->second = {ScStart, ScEnd}; | ||
return Iter->second; | ||
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd}; | ||
if (PointerBounds) | ||
*PtrBoundsPair = Res; | ||
return Res; | ||
} | ||
|
||
/// Calculate Start and End points of memory access using | ||
|
@@ -263,8 +252,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, | |
unsigned DepSetId, unsigned ASId, | ||
PredicatedScalarEvolution &PSE, | ||
bool NeedsFreeze) { | ||
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
const auto &[ScStart, ScEnd] = getStartAndEndForAccess( | ||
Lp, PtrExpr, AccessTy, PSE, DC.getPointerBounds()); | ||
Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds()); | ||
assert(!isa<SCEVCouldNotCompute>(ScStart) && | ||
!isa<SCEVCouldNotCompute>(ScEnd) && | ||
"must be able to compute both start and end expressions"); | ||
|
@@ -1938,10 +1928,11 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( | |
// required for correctness. | ||
if (SE.isLoopInvariant(Src, InnermostLoop) || | ||
SE.isLoopInvariant(Sink, InnermostLoop)) { | ||
const auto &[SrcStart_, SrcEnd_] = | ||
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds); | ||
const auto &[SinkStart_, SinkEnd_] = | ||
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds); | ||
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( | ||
InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds); | ||
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( | ||
InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds); | ||
if (!isa<SCEVCouldNotCompute>(SrcStart_) && | ||
!isa<SCEVCouldNotCompute>(SrcEnd_) && | ||
!isa<SCEVCouldNotCompute>(SinkStart_) && | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a comment here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you move the comment from
llvm/lib/Analysis/LoopAccessAnalysis.cpp
, as it is more precise?I think from the current description, it sounds like the returned value is the last address accessed in the loop, but unless I am missing something it is actually (address of last access)+access size.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!