Skip to content

Commit a00938e

Browse files
authored
Revert "[LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop (#96752)" (#123057)
This reverts commit bfedf64.
1 parent 6ca560a commit a00938e

File tree

5 files changed

+266
-188
lines changed

5 files changed

+266
-188
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -853,25 +853,6 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
853853
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
854854
ScalarEvolution &SE, bool CheckType = true);
855855

856-
/// Calculate Start and End points of memory access.
857-
/// Let's assume A is the first access and B is a memory access on N-th loop
858-
/// iteration. Then B is calculated as:
859-
/// B = A + Step*N .
860-
/// Step value may be positive or negative.
861-
/// N is a calculated back-edge taken count:
862-
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
863-
/// Start and End points are calculated in the following way:
864-
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
865-
/// where SizeOfElt is the size of single memory access in bytes.
866-
///
867-
/// There is no conflict when the intervals are disjoint:
868-
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
869-
std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
870-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
871-
ScalarEvolution *SE,
872-
DenseMap<std::pair<const SCEV *, Type *>,
873-
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
874-
875856
class LoopAccessInfoManager {
876857
/// The cache.
877858
DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;

llvm/lib/Analysis/Loads.cpp

Lines changed: 52 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "llvm/Analysis/Loads.h"
1414
#include "llvm/Analysis/AliasAnalysis.h"
1515
#include "llvm/Analysis/AssumeBundleQueries.h"
16-
#include "llvm/Analysis/LoopAccessAnalysis.h"
1716
#include "llvm/Analysis/LoopInfo.h"
1817
#include "llvm/Analysis/MemoryBuiltins.h"
1918
#include "llvm/Analysis/MemoryLocation.h"
@@ -276,88 +275,84 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
276275
bool llvm::isDereferenceableAndAlignedInLoop(
277276
LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
278277
AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
279-
const Align Alignment = LI->getAlign();
280278
auto &DL = LI->getDataLayout();
281279
Value *Ptr = LI->getPointerOperand();
280+
282281
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
283282
DL.getTypeStoreSize(LI->getType()).getFixedValue());
283+
const Align Alignment = LI->getAlign();
284+
285+
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
284286

285287
// If given a uniform (i.e. non-varying) address, see if we can prove the
286288
// access is safe within the loop w/o needing predication.
287289
if (L->isLoopInvariant(Ptr))
288-
return isDereferenceableAndAlignedPointer(
289-
Ptr, Alignment, EltSize, DL, L->getHeader()->getFirstNonPHI(), AC, &DT);
290-
291-
const SCEV *PtrScev = SE.getSCEV(Ptr);
292-
auto *AddRec = dyn_cast<SCEVAddRecExpr>(PtrScev);
290+
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
291+
HeaderFirstNonPHI, AC, &DT);
293292

294-
// Check to see if we have a repeating access pattern and it's possible
295-
// to prove all accesses are well aligned.
293+
// Otherwise, check to see if we have a repeating access pattern where we can
294+
// prove that all accesses are well aligned and dereferenceable.
295+
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
296296
if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
297297
return false;
298-
299298
auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
300299
if (!Step)
301300
return false;
302301

303-
// For the moment, restrict ourselves to the case where the access size is a
304-
// multiple of the requested alignment and the base is aligned.
305-
// TODO: generalize if a case found which warrants
306-
if (EltSize.urem(Alignment.value()) != 0)
302+
auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
303+
if (!TC)
307304
return false;
308305

309306
// TODO: Handle overlapping accesses.
310-
if (EltSize.ugt(Step->getAPInt().abs()))
311-
return false;
312-
313-
const SCEV *MaxBECount =
314-
SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates);
315-
if (isa<SCEVCouldNotCompute>(MaxBECount))
316-
return false;
317-
318-
const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
319-
L, PtrScev, LI->getType(), MaxBECount, &SE, nullptr);
320-
if (isa<SCEVCouldNotCompute>(AccessStart) ||
321-
isa<SCEVCouldNotCompute>(AccessEnd))
307+
// We should be computing AccessSize as (TC - 1) * Step + EltSize.
308+
if (EltSize.sgt(Step->getAPInt()))
322309
return false;
323310

324-
// Try to get the access size.
325-
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
326-
APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff);
311+
// Compute the total access size for access patterns with unit stride and
312+
// patterns with gaps. For patterns with unit stride, Step and EltSize are the
313+
// same.
314+
// For patterns with gaps (i.e. non unit stride), we are
315+
// accessing EltSize bytes at every Step.
316+
APInt AccessSize = TC * Step->getAPInt();
327317

318+
assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
319+
"implied by addrec definition");
328320
Value *Base = nullptr;
329-
APInt AccessSize;
330-
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) {
331-
Base = NewBase->getValue();
332-
AccessSize = MaxPtrDiff;
333-
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
334-
if (MinAdd->getNumOperands() != 2)
335-
return false;
336-
337-
const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
338-
const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
339-
if (!Offset || !NewBase)
340-
return false;
341-
342-
// The following code below assumes the offset is unsigned, but GEP
343-
// offsets are treated as signed so we can end up with a signed value
344-
// here too. For example, suppose the initial PHI value is (i8 255),
345-
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
346-
if (Offset->getAPInt().isNegative())
347-
return false;
321+
if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
322+
Base = StartS->getValue();
323+
} else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
324+
// Handle (NewBase + offset) as start value.
325+
const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
326+
const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
327+
if (StartS->getNumOperands() == 2 && Offset && NewBase) {
328+
// The following code below assumes the offset is unsigned, but GEP
329+
// offsets are treated as signed so we can end up with a signed value
330+
// here too. For example, suppose the initial PHI value is (i8 255),
331+
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
332+
if (Offset->getAPInt().isNegative())
333+
return false;
348334

349-
// For the moment, restrict ourselves to the case where the offset is a
350-
// multiple of the requested alignment and the base is aligned.
351-
// TODO: generalize if a case found which warrants
352-
if (Offset->getAPInt().urem(Alignment.value()) != 0)
353-
return false;
335+
// For the moment, restrict ourselves to the case where the offset is a
336+
// multiple of the requested alignment and the base is aligned.
337+
// TODO: generalize if a case found which warrants
338+
if (Offset->getAPInt().urem(Alignment.value()) != 0)
339+
return false;
340+
Base = NewBase->getValue();
341+
bool Overflow = false;
342+
AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow);
343+
if (Overflow)
344+
return false;
345+
}
346+
}
354347

355-
AccessSize = MaxPtrDiff + Offset->getAPInt();
356-
Base = NewBase->getValue();
357-
} else
348+
if (!Base)
358349
return false;
359350

360-
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
351+
// For the moment, restrict ourselves to the case where the access size is a
352+
// multiple of the requested alignment and the base is aligned.
353+
// TODO: generalize if a case found which warrants
354+
if (EltSize.urem(Alignment.value()) != 0)
355+
return false;
361356
return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
362357
HeaderFirstNonPHI, AC, &DT);
363358
}

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -190,29 +190,42 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
190190
Members.push_back(Index);
191191
}
192192

193-
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
194-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
195-
ScalarEvolution *SE,
193+
/// Calculate Start and End points of memory access.
194+
/// Let's assume A is the first access and B is a memory access on N-th loop
195+
/// iteration. Then B is calculated as:
196+
/// B = A + Step*N .
197+
/// Step value may be positive or negative.
198+
/// N is a calculated back-edge taken count:
199+
/// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
200+
/// Start and End points are calculated in the following way:
201+
/// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
202+
/// where SizeOfElt is the size of single memory access in bytes.
203+
///
204+
/// There is no conflict when the intervals are disjoint:
205+
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
206+
static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
207+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
208+
PredicatedScalarEvolution &PSE,
196209
DenseMap<std::pair<const SCEV *, Type *>,
197-
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
198-
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
199-
if (PointerBounds) {
200-
auto [Iter, Ins] = PointerBounds->insert(
201-
{{PtrExpr, AccessTy},
202-
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
203-
if (!Ins)
204-
return Iter->second;
205-
PtrBoundsPair = &Iter->second;
206-
}
210+
std::pair<const SCEV *, const SCEV *>> &PointerBounds) {
211+
ScalarEvolution *SE = PSE.getSE();
212+
213+
auto [Iter, Ins] = PointerBounds.insert(
214+
{{PtrExpr, AccessTy},
215+
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
216+
if (!Ins)
217+
return Iter->second;
207218

208219
const SCEV *ScStart;
209220
const SCEV *ScEnd;
210221

211222
if (SE->isLoopInvariant(PtrExpr, Lp)) {
212223
ScStart = ScEnd = PtrExpr;
213224
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
225+
const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount();
226+
214227
ScStart = AR->getStart();
215-
ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
228+
ScEnd = AR->evaluateAtIteration(Ex, *SE);
216229
const SCEV *Step = AR->getStepRecurrence(*SE);
217230

218231
// For expressions with negative step, the upper bound is ScStart and the
@@ -231,18 +244,16 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
231244
return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
232245

233246
assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
234-
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
247+
assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant");
235248

236249
// Add the size of the pointed element to ScEnd.
237250
auto &DL = Lp->getHeader()->getDataLayout();
238251
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
239252
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
240253
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
241254

242-
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
243-
if (PointerBounds)
244-
*PtrBoundsPair = Res;
245-
return Res;
255+
Iter->second = {ScStart, ScEnd};
256+
return Iter->second;
246257
}
247258

248259
/// Calculate Start and End points of memory access using
@@ -252,9 +263,8 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
252263
unsigned DepSetId, unsigned ASId,
253264
PredicatedScalarEvolution &PSE,
254265
bool NeedsFreeze) {
255-
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
256266
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
257-
Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
267+
Lp, PtrExpr, AccessTy, PSE, DC.getPointerBounds());
258268
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
259269
!isa<SCEVCouldNotCompute>(ScEnd) &&
260270
"must be able to compute both start and end expressions");
@@ -1928,11 +1938,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19281938
// required for correctness.
19291939
if (SE.isLoopInvariant(Src, InnermostLoop) ||
19301940
SE.isLoopInvariant(Sink, InnermostLoop)) {
1931-
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
1932-
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1933-
InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
1934-
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1935-
InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
1941+
const auto &[SrcStart_, SrcEnd_] =
1942+
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
1943+
const auto &[SinkStart_, SinkEnd_] =
1944+
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
19361945
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
19371946
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
19381947
!isa<SCEVCouldNotCompute>(SinkStart_) &&

0 commit comments

Comments
 (0)