Skip to content

[DA] handle memory accesses with different offsets and strides #123436

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
238bbca
[DA] Dependence analysis does not handle array accesses of different …
sebpop Nov 14, 2024
164a0fa
[DA] check memory offsets are multiples of elements size
sebpop Jan 17, 2025
a479bdf
fix auto type
sebpop Feb 26, 2025
9213c32
convert checkOffsets lambda to static function
sebpop Feb 26, 2025
caf4f8d
remove useless comment
sebpop Feb 26, 2025
468652d
add more comments and examples
sebpop Feb 26, 2025
108d224
add extra slash for function comments
sebpop Feb 27, 2025
c62e1f4
simplify logic with a single return
sebpop Feb 27, 2025
8dcc5a0
use auto on dyn_cast assign for readability
sebpop Feb 27, 2025
92f6b4f
simplify logic for similar code-paths
sebpop Feb 27, 2025
0eae7f0
move isKnownMultipleOf to ScalarEvolution
sebpop Feb 27, 2025
60ba4e6
record runtime assumptions for parametric expressions
sebpop Apr 8, 2025
8c69ebf
disable loop interchange, fusion, and unroll-and-jam on runtime assum…
sebpop Apr 9, 2025
a80c878
handle compile time "s % m != 0" in isKnownMultipleOf
sebpop Apr 14, 2025
bf9fcfd
add testcase from Ryotaro Kasuga's review
sebpop Apr 14, 2025
3dbba8e
record runtime predicates on each Dependence relation
sebpop Apr 14, 2025
d9846e9
add testcases
sebpop Apr 29, 2025
b954282
Revert "disable loop interchange, fusion, and unroll-and-jam on runti…
sebpop May 5, 2025
9a110df
turn off UnderRuntimeAssumptions in depends computation
sebpop May 5, 2025
297ff44
clang-format code around my changes
sebpop May 6, 2025
861ef01
update comments for last wave of reviews
sebpop May 13, 2025
4972776
also reject before or after pointer
sebpop May 13, 2025
338668f
use LocationSize.isPrecise
sebpop May 13, 2025
869b5ff
remove Assumptions.empty branch
sebpop May 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions llvm/include/llvm/Analysis/DependenceAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H

#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
Expand All @@ -49,8 +50,6 @@ namespace llvm {
template <typename T> class ArrayRef;
class Loop;
class LoopInfo;
class ScalarEvolution;
class SCEV;
class SCEVConstant;
class raw_ostream;

Expand All @@ -74,8 +73,9 @@ namespace llvm {
Dependence &operator=(Dependence &&) = default;

public:
Dependence(Instruction *Source, Instruction *Destination)
: Src(Source), Dst(Destination) {}
Dependence(Instruction *Source, Instruction *Destination,
const SCEVUnionPredicate &A)
: Src(Source), Dst(Destination), Assumptions(A) {}
virtual ~Dependence() = default;

/// Dependence::DVEntry - Each level in the distance/direction vector
Expand Down Expand Up @@ -203,6 +203,10 @@ namespace llvm {
/// field.
void setNextSuccessor(const Dependence *succ) { NextSuccessor = succ; }

/// getRuntimeAssumptions - Returns the runtime assumptions under which this
/// Dependence relation is valid.
SCEVUnionPredicate getRuntimeAssumptions() const { return Assumptions; }

/// dump - For debugging purposes, dumps a dependence to OS.
///
void dump(raw_ostream &OS) const;
Expand All @@ -211,6 +215,7 @@ namespace llvm {
Instruction *Src, *Dst;

private:
SCEVUnionPredicate Assumptions;
const Dependence *NextPredecessor = nullptr, *NextSuccessor = nullptr;
friend class DependenceInfo;
};
Expand All @@ -225,8 +230,9 @@ namespace llvm {
/// input dependences are unordered.
class FullDependence final : public Dependence {
public:
FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent,
unsigned Levels);
FullDependence(Instruction *Source, Instruction *Destination,
const SCEVUnionPredicate &Assumes,
bool PossiblyLoopIndependent, unsigned Levels);

/// isLoopIndependent - Returns true if this is a loop-independent
/// dependence.
Expand Down Expand Up @@ -302,9 +308,13 @@ namespace llvm {

/// depends - Tests for a dependence between the Src and Dst instructions.
/// Returns NULL if no dependence; otherwise, returns a Dependence (or a
/// FullDependence) with as much information as can be gleaned.
std::unique_ptr<Dependence> depends(Instruction *Src,
Instruction *Dst);
/// FullDependence) with as much information as can be gleaned. By default,
/// the dependence test collects a set of runtime assumptions that cannot be
/// solved at compilation time. By default UnderRuntimeAssumptions is false
/// for a safe approximation of the dependence relation that does not
/// require runtime checks.
std::unique_ptr<Dependence> depends(Instruction *Src, Instruction *Dst,
bool UnderRuntimeAssumptions = false);

/// getSplitIteration - Give a dependence that's splittable at some
/// particular level, return the iteration that should be used to split
Expand Down Expand Up @@ -350,11 +360,16 @@ namespace llvm {

Function *getFunction() const { return F; }

/// getRuntimeAssumptions - Returns all the runtime assumptions under which
/// the dependence test is valid.
SCEVUnionPredicate getRuntimeAssumptions() const;

private:
AAResults *AA;
ScalarEvolution *SE;
LoopInfo *LI;
Function *F;
SmallVector<const SCEVPredicate *, 4> Assumptions;

/// Subscript - This private struct represents a pair of subscripts from
/// a pair of potentially multi-dimensional array references. We use a
Expand Down
7 changes: 7 additions & 0 deletions llvm/include/llvm/Analysis/ScalarEvolution.h
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,13 @@ class ScalarEvolution {
bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false,
bool OrNegative = false);

/// Check that \p S is a multiple of \p M. When \p S is an AddRecExpr, \p S is
/// a multiple of \p M if \p S starts with a multiple of \p M and at every
/// iteration step \p S only adds multiples of \p M. \p Assumptions records
/// the runtime predicates under which \p S is a multiple of \p M.
bool isKnownMultipleOf(const SCEV *S, uint64_t M,
SmallVectorImpl<const SCEVPredicate *> &Assumptions);

/// Splits SCEV expression \p S into two SCEVs. One of them is obtained from
/// \p S by substitution of all AddRec sub-expression related to loop \p L
/// with initial value of that SCEV. The second is obtained from \p S by
Expand Down
113 changes: 87 additions & 26 deletions llvm/lib/Analysis/DependenceAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
if (DstI->mayReadOrWriteMemory()) {
OS << "Src:" << *SrcI << " --> Dst:" << *DstI << "\n";
OS << " da analyze - ";
if (auto D = DA->depends(&*SrcI, &*DstI)) {
if (auto D = DA->depends(&*SrcI, &*DstI,
/*UnderRuntimeAssumptions=*/true)) {
// Normalize negative direction vectors if required by clients.
if (NormalizeResults && D->normalize(&SE))
OS << "normalized - ";
Expand All @@ -199,13 +200,17 @@ static void dumpExampleDependence(raw_ostream &OS, DependenceInfo *DA,
OS << "!\n";
}
}
}
else
} else
OS << "none!\n";
}
}
}
}
SCEVUnionPredicate Assumptions = DA->getRuntimeAssumptions();
if (!Assumptions.isAlwaysTrue()) {
OS << "Runtime Assumptions:\n";
Assumptions.print(OS, 0);
}
}

void DependenceAnalysisWrapperPass::print(raw_ostream &OS,
Expand Down Expand Up @@ -264,9 +269,10 @@ bool Dependence::isScalar(unsigned level) const {
// FullDependence methods

FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
const SCEVUnionPredicate &Assumes,
bool PossiblyLoopIndependent,
unsigned CommonLevels)
: Dependence(Source, Destination), Levels(CommonLevels),
: Dependence(Source, Destination, Assumes), Levels(CommonLevels),
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
if (CommonLevels)
Expand Down Expand Up @@ -706,6 +712,12 @@ void Dependence::dump(raw_ostream &OS) const {
OS << " splitable";
}
OS << "!\n";

SCEVUnionPredicate Assumptions = getRuntimeAssumptions();
if (!Assumptions.isAlwaysTrue()) {
OS << " Runtime Assumptions:\n";
Assumptions.print(OS, 2);
}
}

// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
Expand Down Expand Up @@ -3569,6 +3581,10 @@ bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
Inv.invalidate<LoopAnalysis>(F, PA);
}

SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
return SCEVUnionPredicate(Assumptions, *SE);
}

// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
Expand All @@ -3581,7 +3597,9 @@ bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
// Care is required to keep the routine below, getSplitIteration(),
// up to date with respect to this routine.
std::unique_ptr<Dependence>
DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
DependenceInfo::depends(Instruction *Src, Instruction *Dst,
bool UnderRuntimeAssumptions) {
SmallVector<const SCEVPredicate *, 4> Assume;
bool PossiblyLoopIndependent = true;
if (Src == Dst)
PossiblyLoopIndependent = false;
Expand All @@ -3593,22 +3611,20 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
return std::make_unique<Dependence>(Src, Dst);
return std::make_unique<Dependence>(Src, Dst,
SCEVUnionPredicate(Assume, *SE));
}

assert(isLoadOrStore(Src) && "instruction is not load or store");
assert(isLoadOrStore(Dst) && "instruction is not load or store");
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
const MemoryLocation &SrcLoc = MemoryLocation::get(Src);

switch (underlyingObjectsAlias(AA, F->getDataLayout(),
MemoryLocation::get(Dst),
MemoryLocation::get(Src))) {
switch (underlyingObjectsAlias(AA, F->getDataLayout(), DstLoc, SrcLoc)) {
case AliasResult::MayAlias:
case AliasResult::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
return std::make_unique<Dependence>(Src, Dst);
return std::make_unique<Dependence>(Src, Dst,
SCEVUnionPredicate(Assume, *SE));
case AliasResult::NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
LLVM_DEBUG(dbgs() << "no alias\n");
Expand All @@ -3617,30 +3633,75 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
break; // The underlying objects alias; test accesses for dependence.
}

// establish loop nesting levels
establishNestingLevels(Src, Dst);
LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");

FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;
if (DstLoc.Size != SrcLoc.Size || !DstLoc.Size.isPrecise() ||
!SrcLoc.Size.isPrecise()) {
// The dependence test gets confused if the size of the memory accesses
// differ.
LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
return std::make_unique<Dependence>(Src, Dst,
SCEVUnionPredicate(Assume, *SE));
}

unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n");
LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n");
if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) {
const SCEV *SrcBase = SE->getPointerBase(SrcSCEV);
const SCEV *DstBase = SE->getPointerBase(DstSCEV);
if (SrcBase != DstBase) {
// If two pointers have different bases, trying to analyze indexes won't
// work; we can't compare them to each other. This can happen, for example,
// if one is produced by an LCSSA PHI node.
//
// We check this upfront so we don't crash in cases where getMinusSCEV()
// returns a SCEVCouldNotCompute.
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
return std::make_unique<Dependence>(Src, Dst);
return std::make_unique<Dependence>(Src, Dst,
SCEVUnionPredicate(Assume, *SE));
}

uint64_t EltSize = SrcLoc.Size.toRaw();
const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase);
const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);

if (Src != Dst) {
// Check that memory access offsets are multiples of element sizes.
if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assume) ||
!SE->isKnownMultipleOf(DstEv, EltSize, Assume)) {
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
return std::make_unique<Dependence>(Src, Dst,
SCEVUnionPredicate(Assume, *SE));
}
}

if (!Assume.empty()) {
if (!UnderRuntimeAssumptions)
return std::make_unique<Dependence>(Src, Dst,
SCEVUnionPredicate(Assume, *SE));
// Add non-redundant assumptions.
unsigned N = Assumptions.size();
for (const SCEVPredicate *P : Assume) {
bool Implied = false;
for (unsigned I = 0; I != N && !Implied; I++)
if (Assumptions[I]->implies(P, *SE))
Implied = true;
if (!Implied)
Assumptions.push_back(P);
}
}

establishNestingLevels(Src, Dst);
LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");

FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;

unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
Pair[0].Src = SrcSCEV;
Pair[0].Dst = DstSCEV;

Expand Down Expand Up @@ -4034,7 +4095,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
// establish loop nesting levels
establishNestingLevels(Src, Dst);

FullDependence Result(Src, Dst, false, CommonLevels);
FullDependence Result(Src, Dst, Dep.Assumptions, false, CommonLevels);

unsigned Pairs = 1;
SmallVector<Subscript, 2> Pair(Pairs);
Expand Down
50 changes: 50 additions & 0 deletions llvm/lib/Analysis/ScalarEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10971,6 +10971,56 @@ bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero,
return all_of(Mul->operands(), NonRecursive) && (OrZero || isKnownNonZero(S));
}

bool ScalarEvolution::isKnownMultipleOf(
const SCEV *S, uint64_t M,
SmallVectorImpl<const SCEVPredicate *> &Assumptions) {
if (M == 0)
return false;
Comment on lines +10977 to +10978
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: What if S is also 0? This is a very special case, so I don't think we need to handle such a case carefully for now, but I would prefer to leave some TODO/FIXME comments for such a case. (For example, I think it's also reasonable to append a predicate for S == 0 to Assumptions and return true here.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

S==0 in the context of getMinusSCEV would mean an eltSize of 0, which means memory will never be accessed which means there never is a dependency. But there isn't necessarily this context for the caller of isKnownMultipleOf.

This function can always return false, since it is returns true of if it is a known multiple. The case 0/0 is still questionable, so it is safe to return false here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But there isn't necessarily this context for the caller of isKnownMultipleOf

Yes, this is exactly what I'm concerned about.

This function can always return false, since it is returns true of if it is a known multiple. The case 0/0 is still questionable, so it is safe to return false here.

I see, that's make sense to me.

if (M == 1)
return true;

// Recursively check AddRec operands. An AddRecExpr S is a multiple of M if S
// starts with a multiple of M and at every iteration step S only adds
// multiples of M.
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
return isKnownMultipleOf(AddRec->getStart(), M, Assumptions) &&
isKnownMultipleOf(AddRec->getStepRecurrence(*this), M, Assumptions);

// For a constant, check that "S % M == 0".
if (auto *Cst = dyn_cast<SCEVConstant>(S)) {
APInt C = Cst->getAPInt();
return C.urem(M) == 0;
}

// TODO: Also check other SCEV expressions, i.e., SCEVAddRecExpr, etc.

// Basic tests have failed.
// Check "S % M == 0" at compile time and record runtime Assumptions.
auto *STy = dyn_cast<IntegerType>(S->getType());
const SCEV *SmodM =
getURemExpr(S, getConstant(ConstantInt::get(STy, M, false)));
const SCEV *Zero = getZero(STy);

// Check whether "S % M == 0" is known at compile time.
if (isKnownPredicate(ICmpInst::ICMP_EQ, SmodM, Zero))
return true;

// Check whether "S % M != 0" is known at compile time.
if (isKnownPredicate(ICmpInst::ICMP_NE, SmodM, Zero))
return false;

const SCEVPredicate *P = getComparePredicate(ICmpInst::ICMP_EQ, SmodM, Zero);

// Detect redundant predicates.
for (auto *A : Assumptions)
if (A->implies(P, *this))
return true;

// Only record non-redundant predicates.
Assumptions.push_back(P);
return true;
}

std::pair<const SCEV *, const SCEV *>
ScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) {
// Compute SCEV on entry of loop L.
Expand Down
22 changes: 22 additions & 0 deletions llvm/test/Analysis/DependenceAnalysis/DifferentAccessSize.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
; RUN: | FileCheck %s

; The dependence test does not handle array accesses of different sizes: i32 and i64.
; Bug 16183 - https://github.com/llvm/llvm-project/issues/16183

define i64 @bug16183_alias(ptr nocapture %A) {
; CHECK-LABEL: 'bug16183_alias'
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: %0 = load i64, ptr %A, align 8
; CHECK-NEXT: da analyze - confused!
; CHECK-NEXT: Src: %0 = load i64, ptr %A, align 8 --> Dst: %0 = load i64, ptr %A, align 8
; CHECK-NEXT: da analyze - none!
;
entry:
%arrayidx = getelementptr inbounds i32, ptr %A, i64 1
store i32 2, ptr %arrayidx, align 4
%0 = load i64, ptr %A, align 8
ret i64 %0
}
Loading