Skip to content

Commit 15a22e8

Browse files
committed
[LoopVectorize] Add support for reverse loops in isDereferenceableAndAlignedInLoop
Currently when we encounter a negative step in the induction variable isDereferenceableAndAlignedInLoop bails out because the element size is signed greater than the step. This patch adds support for negative steps in cases where we detect the start address for the load is of the form base + offset. In this case the address decrements in each iteration so we need to calculate the access size differently. I have done this by caling getStartAndEndForAccess from LoopAccessAnalysis.cpp. The changed test in LoopVectorize/X86/load-deref-pred.ll now passes because previously we were calculating the total access size incorrectly, whereas now it is 412 bytes and fits perfectly into the alloca.
1 parent 7802fb5 commit 15a22e8

File tree

5 files changed

+183
-261
lines changed

5 files changed

+183
-261
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,15 @@ bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
843843
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
844844
ScalarEvolution &SE, bool CheckType = true);
845845

846+
/// For a given Loop \p Lp and pointer \p PtrExpr return a pair of SCEV values
847+
/// representing the maximum range of addresses accessed in the loop, i.e.
848+
/// [min,max).
849+
std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
850+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
851+
ScalarEvolution *SE,
852+
DenseMap<std::pair<const SCEV *, Type *>,
853+
std::pair<const SCEV *, const SCEV *>> *PointerBounds);
854+
846855
class LoopAccessInfoManager {
847856
/// The cache.
848857
DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;

llvm/lib/Analysis/Loads.cpp

Lines changed: 62 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/Analysis/Loads.h"
1414
#include "llvm/Analysis/AliasAnalysis.h"
1515
#include "llvm/Analysis/AssumeBundleQueries.h"
16+
#include "llvm/Analysis/LoopAccessAnalysis.h"
1617
#include "llvm/Analysis/LoopInfo.h"
1718
#include "llvm/Analysis/MemoryBuiltins.h"
1819
#include "llvm/Analysis/MemoryLocation.h"
@@ -276,84 +277,85 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
276277
bool llvm::isDereferenceableAndAlignedInLoop(
277278
LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT,
278279
AssumptionCache *AC, SmallVectorImpl<const SCEVPredicate *> *Predicates) {
279-
auto &DL = LI->getDataLayout();
280-
Value *Ptr = LI->getPointerOperand();
281-
282-
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
283-
DL.getTypeStoreSize(LI->getType()).getFixedValue());
284-
const Align Alignment = LI->getAlign();
280+
const SCEV *Ptr = SE.getSCEV(LI->getPointerOperand());
281+
auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ptr);
285282

286-
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
287-
288-
// If given a uniform (i.e. non-varying) address, see if we can prove the
289-
// access is safe within the loop w/o needing predication.
290-
if (L->isLoopInvariant(Ptr))
291-
return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
292-
HeaderFirstNonPHI, AC, &DT);
293-
294-
// Otherwise, check to see if we have a repeating access pattern where we can
295-
// prove that all accesses are well aligned and dereferenceable.
296-
auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
283+
// Check to see if we have a repeating access pattern and it's possible
284+
// to prove all accesses are well aligned.
297285
if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
298286
return false;
287+
299288
auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
300289
if (!Step)
301290
return false;
302291

303-
auto TC = SE.getSmallConstantMaxTripCount(L, Predicates);
304-
if (!TC)
292+
// For the moment, restrict ourselves to the case where the access size is a
293+
// multiple of the requested alignment and the base is aligned.
294+
// TODO: generalize if a case found which warrants
295+
const Align Alignment = LI->getAlign();
296+
auto &DL = LI->getDataLayout();
297+
APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
298+
DL.getTypeStoreSize(LI->getType()).getFixedValue());
299+
if (EltSize.urem(Alignment.value()) != 0)
305300
return false;
306301

307302
// TODO: Handle overlapping accesses.
308-
// We should be computing AccessSize as (TC - 1) * Step + EltSize.
309-
if (EltSize.sgt(Step->getAPInt()))
303+
if (EltSize.ugt(Step->getAPInt().abs()))
310304
return false;
311305

312-
// Compute the total access size for access patterns with unit stride and
313-
// patterns with gaps. For patterns with unit stride, Step and EltSize are the
314-
// same.
315-
// For patterns with gaps (i.e. non unit stride), we are
316-
// accessing EltSize bytes at every Step.
317-
APInt AccessSize = TC * Step->getAPInt();
306+
const SCEV *MaxBECount =
307+
SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates);
308+
if (isa<SCEVCouldNotCompute>(MaxBECount))
309+
return false;
318310

319-
assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
320-
"implied by addrec definition");
321-
Value *Base = nullptr;
322-
if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
323-
Base = StartS->getValue();
324-
} else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
325-
// Handle (NewBase + offset) as start value.
326-
const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
327-
const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
328-
if (StartS->getNumOperands() == 2 && Offset && NewBase) {
329-
// The following code below assumes the offset is unsigned, but GEP
330-
// offsets are treated as signed so we can end up with a signed value
331-
// here too. For example, suppose the initial PHI value is (i8 255),
332-
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
333-
if (Offset->getAPInt().isNegative())
334-
return false;
311+
const auto &[AccessStart, AccessEnd] =
312+
getStartAndEndForAccess(L, Ptr, LI->getType(), MaxBECount, &SE, nullptr);
313+
if (isa<SCEVCouldNotCompute>(AccessStart) ||
314+
isa<SCEVCouldNotCompute>(AccessEnd))
315+
return false;
335316

336-
// For the moment, restrict ourselves to the case where the offset is a
337-
// multiple of the requested alignment and the base is aligned.
338-
// TODO: generalize if a case found which warrants
339-
if (Offset->getAPInt().urem(Alignment.value()) != 0)
340-
return false;
341-
Base = NewBase->getValue();
342-
bool Overflow = false;
343-
AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow);
344-
if (Overflow)
345-
return false;
346-
}
347-
}
317+
// Try to get the access size.
318+
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
319+
APInt MaxPtrDiff = SE.getUnsignedRangeMax(PtrDiff);
348320

349-
if (!Base)
321+
// If the (max) pointer difference is > 32 bits then it's unlikely to be
322+
// dereferenceable.
323+
if (MaxPtrDiff.getActiveBits() > 32)
350324
return false;
351325

352-
// For the moment, restrict ourselves to the case where the access size is a
353-
// multiple of the requested alignment and the base is aligned.
354-
// TODO: generalize if a case found which warrants
355-
if (EltSize.urem(Alignment.value()) != 0)
326+
Value *Base = nullptr;
327+
APInt AccessSize;
328+
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) {
329+
Base = NewBase->getValue();
330+
AccessSize = MaxPtrDiff;
331+
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
332+
if (MinAdd->getNumOperands() != 2)
333+
return false;
334+
335+
const auto *Offset = dyn_cast<SCEVConstant>(MinAdd->getOperand(0));
336+
const auto *NewBase = dyn_cast<SCEVUnknown>(MinAdd->getOperand(1));
337+
if (!Offset || !NewBase)
338+
return false;
339+
340+
// The following code below assumes the offset is unsigned, but GEP
341+
// offsets are treated as signed so we can end up with a signed value
342+
// here too. For example, suppose the initial PHI value is (i8 255),
343+
// the offset will be treated as (i8 -1) and sign-extended to (i64 -1).
344+
if (Offset->getAPInt().isNegative())
345+
return false;
346+
347+
// For the moment, restrict ourselves to the case where the offset is a
348+
// multiple of the requested alignment and the base is aligned.
349+
// TODO: generalize if a case found which warrants
350+
if (Offset->getAPInt().urem(Alignment.value()) != 0)
351+
return false;
352+
353+
AccessSize = MaxPtrDiff + Offset->getAPInt();
354+
Base = NewBase->getValue();
355+
} else
356356
return false;
357+
358+
Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
357359
return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
358360
HeaderFirstNonPHI, AC, &DT);
359361
}

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -203,29 +203,29 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
203203
///
204204
/// There is no conflict when the intervals are disjoint:
205205
/// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
206-
static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
207-
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy,
208-
PredicatedScalarEvolution &PSE,
206+
std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
207+
const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
208+
ScalarEvolution *SE,
209209
DenseMap<std::pair<const SCEV *, Type *>,
210-
std::pair<const SCEV *, const SCEV *>> &PointerBounds) {
211-
ScalarEvolution *SE = PSE.getSE();
212-
213-
auto [Iter, Ins] = PointerBounds.insert(
214-
{{PtrExpr, AccessTy},
215-
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
216-
if (!Ins)
217-
return Iter->second;
210+
std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
211+
std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
212+
if (PointerBounds) {
213+
auto [Iter, Ins] = PointerBounds->insert(
214+
{{PtrExpr, AccessTy},
215+
{SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
216+
if (!Ins)
217+
return Iter->second;
218+
PtrBoundsPair = &Iter->second;
219+
}
218220

219221
const SCEV *ScStart;
220222
const SCEV *ScEnd;
221223

222224
if (SE->isLoopInvariant(PtrExpr, Lp)) {
223225
ScStart = ScEnd = PtrExpr;
224226
} else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
225-
const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount();
226-
227227
ScStart = AR->getStart();
228-
ScEnd = AR->evaluateAtIteration(Ex, *SE);
228+
ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
229229
const SCEV *Step = AR->getStepRecurrence(*SE);
230230

231231
// For expressions with negative step, the upper bound is ScStart and the
@@ -244,16 +244,18 @@ static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess(
244244
return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
245245

246246
assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
247-
assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant");
247+
assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
248248

249249
// Add the size of the pointed element to ScEnd.
250250
auto &DL = Lp->getHeader()->getDataLayout();
251251
Type *IdxTy = DL.getIndexType(PtrExpr->getType());
252252
const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
253253
ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
254254

255-
Iter->second = {ScStart, ScEnd};
256-
return Iter->second;
255+
std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
256+
if (PointerBounds)
257+
*PtrBoundsPair = Res;
258+
return Res;
257259
}
258260

259261
/// Calculate Start and End points of memory access using
@@ -263,8 +265,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
263265
unsigned DepSetId, unsigned ASId,
264266
PredicatedScalarEvolution &PSE,
265267
bool NeedsFreeze) {
268+
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
266269
const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
267-
Lp, PtrExpr, AccessTy, PSE, DC.getPointerBounds());
270+
Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
268271
assert(!isa<SCEVCouldNotCompute>(ScStart) &&
269272
!isa<SCEVCouldNotCompute>(ScEnd) &&
270273
"must be able to compute both start and end expressions");
@@ -1937,10 +1940,11 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19371940
// required for correctness.
19381941
if (SE.isLoopInvariant(Src, InnermostLoop) ||
19391942
SE.isLoopInvariant(Sink, InnermostLoop)) {
1940-
const auto &[SrcStart_, SrcEnd_] =
1941-
getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds);
1942-
const auto &[SinkStart_, SinkEnd_] =
1943-
getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds);
1943+
const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
1944+
const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1945+
InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
1946+
const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1947+
InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
19441948
if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
19451949
!isa<SCEVCouldNotCompute>(SrcEnd_) &&
19461950
!isa<SCEVCouldNotCompute>(SinkStart_) &&

0 commit comments

Comments
 (0)