@@ -1937,27 +1937,6 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
1937
1937
LLVM_DEBUG (dbgs () << " LAA: Distance for " << *AInst << " to " << *BInst
1938
1938
<< " : " << *Dist << " \n " );
1939
1939
1940
- // Check if we can prove that Sink only accesses memory after Src's end or
1941
- // vice versa. At the moment this is limited to cases where either source or
1942
- // sink are loop invariant to avoid compile-time increases. This is not
1943
- // required for correctness.
1944
- if (SE.isLoopInvariant (Src, InnermostLoop) ||
1945
- SE.isLoopInvariant (Sink, InnermostLoop)) {
1946
- const auto &[SrcStart, SrcEnd] =
1947
- getStartAndEndForAccess (InnermostLoop, Src, ATy, PSE, PointerBounds);
1948
- const auto &[SinkStart, SinkEnd] =
1949
- getStartAndEndForAccess (InnermostLoop, Sink, BTy, PSE, PointerBounds);
1950
- if (!isa<SCEVCouldNotCompute>(SrcStart) &&
1951
- !isa<SCEVCouldNotCompute>(SrcEnd) &&
1952
- !isa<SCEVCouldNotCompute>(SinkStart) &&
1953
- !isa<SCEVCouldNotCompute>(SinkEnd)) {
1954
- if (SE.isKnownPredicate (CmpInst::ICMP_ULE, SrcEnd, SinkStart))
1955
- return MemoryDepChecker::Dependence::NoDep;
1956
- if (SE.isKnownPredicate (CmpInst::ICMP_ULE, SinkEnd, SrcStart))
1957
- return MemoryDepChecker::Dependence::NoDep;
1958
- }
1959
- }
1960
-
1961
1940
// Need accesses with constant strides and the same direction for further
1962
1941
// dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
1963
1942
// similar code or pointer arithmetic that could wrap in the address space.
@@ -2003,12 +1982,45 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2003
1982
const MemAccessInfo &B, unsigned BIdx) {
2004
1983
assert (AIdx < BIdx && " Must pass arguments in program order" );
2005
1984
1985
+ // Check if we can prove that Sink only accesses memory after Src's end or
1986
+ // vice versa. The helper is used to perform the checks only on the exit paths
1987
+ // where it helps to improve the analysis result.
1988
+ auto CheckCompletelyBeforeOrAfter = [&]() {
1989
+ auto *APtr = A.getPointer ();
1990
+ auto *BPtr = B.getPointer ();
1991
+
1992
+ Type *ATy = getLoadStoreType (InstMap[AIdx]);
1993
+ Type *BTy = getLoadStoreType (InstMap[BIdx]);
1994
+
1995
+ const SCEV *Src = PSE.getSCEV (APtr);
1996
+ const SCEV *Sink = PSE.getSCEV (BPtr);
1997
+
1998
+ const auto &[SrcStart, SrcEnd] =
1999
+ getStartAndEndForAccess (InnermostLoop, Src, ATy, PSE, PointerBounds);
2000
+ if (isa<SCEVCouldNotCompute>(SrcStart) || isa<SCEVCouldNotCompute>(SrcEnd))
2001
+ return false ;
2002
+
2003
+ const auto &[SinkStart, SinkEnd] =
2004
+ getStartAndEndForAccess (InnermostLoop, Sink, BTy, PSE, PointerBounds);
2005
+ if (isa<SCEVCouldNotCompute>(SinkStart) ||
2006
+ isa<SCEVCouldNotCompute>(SinkEnd))
2007
+ return false ;
2008
+
2009
+ auto &SE = *PSE.getSE ();
2010
+ return SE.isKnownPredicate (CmpInst::ICMP_ULE, SrcEnd, SinkStart) ||
2011
+ SE.isKnownPredicate (CmpInst::ICMP_ULE, SinkEnd, SrcStart);
2012
+ };
2013
+
2006
2014
// Get the dependence distance, stride, type size and what access writes for
2007
2015
// the dependence between A and B.
2008
2016
auto Res =
2009
2017
getDependenceDistanceStrideAndSize (A, InstMap[AIdx], B, InstMap[BIdx]);
2010
- if (std::holds_alternative<Dependence::DepType>(Res))
2018
+ if (std::holds_alternative<Dependence::DepType>(Res)) {
2019
+ if (std::get<Dependence::DepType>(Res) == Dependence::Unknown &&
2020
+ CheckCompletelyBeforeOrAfter ())
2021
+ return Dependence::NoDep;
2011
2022
return std::get<Dependence::DepType>(Res);
2023
+ }
2012
2024
2013
2025
auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
2014
2026
std::get<DepDistanceStrideAndSizeInfo>(Res);
@@ -2017,6 +2029,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2017
2029
std::optional<uint64_t > CommonStride =
2018
2030
StrideA == StrideB ? std::make_optional (StrideA) : std::nullopt;
2019
2031
if (isa<SCEVCouldNotCompute>(Dist)) {
2032
+ if (CheckCompletelyBeforeOrAfter ())
2033
+ return Dependence::NoDep;
2034
+
2020
2035
// TODO: Relax requirement that there is a common stride to retry with
2021
2036
// non-constant distance dependencies.
2022
2037
FoundNonConstantDistanceDependence |= CommonStride.has_value ();
@@ -2068,6 +2083,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2068
2083
// Write to the same location with the same size.
2069
2084
return Dependence::Forward;
2070
2085
}
2086
+ assert (!CheckCompletelyBeforeOrAfter () &&
2087
+ " unexpectedly proved no dependence" );
2071
2088
LLVM_DEBUG (dbgs () << " LAA: possibly zero dependence difference but "
2072
2089
" different type sizes\n " );
2073
2090
return Dependence::Unknown;
@@ -2089,6 +2106,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2089
2106
// did not set it when strides were different but there is no inherent
2090
2107
// reason to.
2091
2108
FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2109
+ if (CheckCompletelyBeforeOrAfter ())
2110
+ return Dependence::NoDep;
2092
2111
return Dependence::Unknown;
2093
2112
}
2094
2113
if (!HasSameSize ||
@@ -2108,6 +2127,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2108
2127
// Below we only handle strictly positive distances.
2109
2128
if (MinDistance <= 0 ) {
2110
2129
FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2130
+ if (CheckCompletelyBeforeOrAfter ())
2131
+ return Dependence::NoDep;
2132
+
2111
2133
return Dependence::Unknown;
2112
2134
}
2113
2135
@@ -2124,13 +2146,18 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2124
2146
}
2125
2147
2126
2148
if (!HasSameSize) {
2149
+ if (CheckCompletelyBeforeOrAfter ())
2150
+ return Dependence::NoDep;
2127
2151
LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
2128
2152
" different type sizes\n " );
2129
2153
return Dependence::Unknown;
2130
2154
}
2131
2155
2132
- if (!CommonStride)
2156
+ if (!CommonStride) {
2157
+ if (CheckCompletelyBeforeOrAfter ())
2158
+ return Dependence::NoDep;
2133
2159
return Dependence::Unknown;
2160
+ }
2134
2161
2135
2162
// Bail out early if passed-in parameters make vectorization not feasible.
2136
2163
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -2178,6 +2205,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2178
2205
// dependence distance and the distance may be larger at runtime (and safe
2179
2206
// for vectorization). Classify it as Unknown, so we re-try with runtime
2180
2207
// checks.
2208
+ //
2209
+ if (CheckCompletelyBeforeOrAfter ())
2210
+ return Dependence::NoDep;
2211
+
2181
2212
return Dependence::Unknown;
2182
2213
}
2183
2214
LLVM_DEBUG (dbgs () << " LAA: Failure because of positive minimum distance "
@@ -2190,6 +2221,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2190
2221
if (MinDistanceNeeded > MinDepDistBytes) {
2191
2222
LLVM_DEBUG (dbgs () << " LAA: Failure because it needs at least "
2192
2223
<< MinDistanceNeeded << " size in bytes\n " );
2224
+ assert (!CheckCompletelyBeforeOrAfter () &&
2225
+ " unexpectedly proved no dependence" );
2193
2226
return Dependence::Backward;
2194
2227
}
2195
2228
@@ -2237,6 +2270,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2237
2270
// For non-constant distances, we checked the lower bound of the dependence
2238
2271
// distance and the distance may be larger at runtime (and safe for
2239
2272
// vectorization). Classify it as Unknown, so we re-try with runtime checks.
2273
+ assert (!CheckCompletelyBeforeOrAfter () &&
2274
+ " unexpectedly proved no dependence" );
2240
2275
return Dependence::Unknown;
2241
2276
}
2242
2277
0 commit comments