Skip to content

Commit 5f8cc0c

Browse files
author
Elena Demikhovsky
committed
[Loop Vectorizer] Consecutive memory access - fixed and simplified
Amended consecutive memory access detection in Loop Vectorizer. Load/Store were not handled properly without preceding GEP instruction. Differential Revision: https://reviews.llvm.org/D20789 llvm-svn: 281853
1 parent 6c21e6a commit 5f8cc0c

File tree

4 files changed

+53
-86
lines changed

4 files changed

+53
-86
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,7 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
713713
/// run-time assumptions.
714714
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
715715
const ValueToValueMap &StridesMap = ValueToValueMap(),
716-
bool Assume = false);
716+
bool Assume = false, bool ShouldCheckWrap = true);
717717

718718
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
719719
/// This is a simple API that does not depend on the analysis pass.

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -887,7 +887,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
887887
/// \brief Check whether the access through \p Ptr has a constant stride.
888888
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
889889
const Loop *Lp, const ValueToValueMap &StridesMap,
890-
bool Assume) {
890+
bool Assume, bool ShouldCheckWrap) {
891891
Type *Ty = Ptr->getType();
892892
assert(Ty->isPointerTy() && "Unexpected non-ptr");
893893

@@ -926,9 +926,9 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
926926
// to access the pointer value "0" which is undefined behavior in address
927927
// space 0, therefore we can also vectorize this case.
928928
bool IsInBoundsGEP = isInBoundsGep(Ptr);
929-
bool IsNoWrapAddRec =
930-
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
931-
isNoWrapAddRec(Ptr, AR, PSE, Lp);
929+
bool IsNoWrapAddRec = !ShouldCheckWrap ||
930+
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
931+
isNoWrapAddRec(Ptr, AR, PSE, Lp);
932932
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
933933
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
934934
if (Assume) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2296,87 +2296,13 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
22962296
}
22972297

22982298
int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
2299-
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
2300-
auto *SE = PSE.getSE();
2301-
// Make sure that the pointer does not point to structs.
2302-
if (Ptr->getType()->getPointerElementType()->isAggregateType())
2303-
return 0;
2304-
2305-
// If this value is a pointer induction variable, we know it is consecutive.
2306-
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
2307-
if (Phi && Inductions.count(Phi)) {
2308-
InductionDescriptor II = Inductions[Phi];
2309-
return II.getConsecutiveDirection();
2310-
}
2311-
2312-
GetElementPtrInst *Gep = getGEPInstruction(Ptr);
2313-
if (!Gep)
2314-
return 0;
2315-
2316-
unsigned NumOperands = Gep->getNumOperands();
2317-
Value *GpPtr = Gep->getPointerOperand();
2318-
// If this GEP value is a consecutive pointer induction variable and all of
2319-
// the indices are constant, then we know it is consecutive.
2320-
Phi = dyn_cast<PHINode>(GpPtr);
2321-
if (Phi && Inductions.count(Phi)) {
2322-
2323-
// Make sure that the pointer does not point to structs.
2324-
PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
2325-
if (GepPtrType->getElementType()->isAggregateType())
2326-
return 0;
2327-
2328-
// Make sure that all of the index operands are loop invariant.
2329-
for (unsigned i = 1; i < NumOperands; ++i)
2330-
if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
2331-
return 0;
23322299

2333-
InductionDescriptor II = Inductions[Phi];
2334-
return II.getConsecutiveDirection();
2335-
}
2336-
2337-
unsigned InductionOperand = getGEPInductionOperand(Gep);
2338-
2339-
// Check that all of the gep indices are uniform except for our induction
2340-
// operand.
2341-
for (unsigned i = 0; i != NumOperands; ++i)
2342-
if (i != InductionOperand &&
2343-
!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
2344-
return 0;
2345-
2346-
// We can emit wide load/stores only if the last non-zero index is the
2347-
// induction variable.
2348-
const SCEV *Last = nullptr;
2349-
if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep))
2350-
Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
2351-
else {
2352-
// Because of the multiplication by a stride we can have a s/zext cast.
2353-
// We are going to replace this stride by 1 so the cast is safe to ignore.
2354-
//
2355-
// %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2356-
// %0 = trunc i64 %indvars.iv to i32
2357-
// %mul = mul i32 %0, %Stride1
2358-
// %idxprom = zext i32 %mul to i64 << Safe cast.
2359-
// %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
2360-
//
2361-
Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(),
2362-
Gep->getOperand(InductionOperand), Gep);
2363-
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
2364-
Last =
2365-
(C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend)
2366-
? C->getOperand()
2367-
: Last;
2368-
}
2369-
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
2370-
const SCEV *Step = AR->getStepRecurrence(*SE);
2371-
2372-
// The memory is consecutive because the last index is consecutive
2373-
// and all other indices are loop invariant.
2374-
if (Step->isOne())
2375-
return 1;
2376-
if (Step->isAllOnesValue())
2377-
return -1;
2378-
}
2300+
const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() :
2301+
ValueToValueMap();
23792302

2303+
int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
2304+
if (Stride == 1 || Stride == -1)
2305+
return Stride;
23802306
return 0;
23812307
}
23822308

@@ -2813,8 +2739,6 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
28132739
Ptr = Builder.Insert(Gep2);
28142740

28152741
} else { // No GEP
2816-
// Use the induction element ptr.
2817-
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
28182742
setDebugLocFromInst(Builder, Ptr);
28192743
Ptr = getScalarValue(Ptr, 0, 0);
28202744
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
;RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s
2+
3+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-unknown-linux-gnu"
5+
6+
;; Check consecutive memory access without preceding GEP instruction
7+
8+
; for (int i=0; i<len; i++) {
9+
; *to++ = *from++;
10+
; }
11+
12+
; CHECK-LABEL: @consecutive_no_gep(
13+
; CHECK: vector.body
14+
; CHECK: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
15+
; CHECK: getelementptr float, float* %{{.*}}, i64 %[[index]]
16+
; CHECK: load <4 x float>
17+
18+
define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 {
19+
entry:
20+
%cmp2 = icmp sgt i32 %len, 0
21+
br i1 %cmp2, label %for.body.preheader, label %for.end
22+
23+
for.body.preheader: ; preds = %entry
24+
br label %for.body
25+
26+
for.body: ; preds = %for.body.preheader, %for.body
27+
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
28+
%from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ]
29+
%to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ]
30+
%incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1
31+
%val = load float, float* %from.addr.04, align 4
32+
%incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1
33+
store float %val, float* %to.addr.03, align 4
34+
%inc = add nsw i32 %i.05, 1
35+
%cmp = icmp slt i32 %inc, %len
36+
br i1 %cmp, label %for.body, label %for.end.loopexit
37+
38+
for.end.loopexit: ; preds = %for.body
39+
br label %for.end
40+
41+
for.end: ; preds = %for.end.loopexit, %entry
42+
ret void
43+
}

0 commit comments

Comments
 (0)