@@ -1786,22 +1786,21 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1786
1786
Status = S;
1787
1787
}
1788
1788
1789
- // / Given a dependence-distance \p Dist between two
1790
- // / memory accesses, that have strides in the same direction whose absolute
1791
- // / value of the maximum stride is given in \p MaxStride, and that have the same
1792
- // / type size \p TypeByteSize, in a loop whose maximum backedge taken count is
1793
- // / \p MaxBTC, check if it is possible to prove statically that the dependence
1789
+ // / Given a dependence-distance \p Dist between two memory accesses, that have
1790
+ // / strides in the same direction whose absolute value of the maximum stride is
1791
+ // / given in \p MaxStride, in a loop whose maximum backedge taken count is \p
1792
+ // / MaxBTC, check if it is possible to prove statically that the dependence
1794
1793
// / distance is larger than the range that the accesses will travel through the
1795
1794
// / execution of the loop. If so, return true; false otherwise. This is useful
1796
1795
// / for example in loops such as the following (PR31098):
1796
+ // /
1797
1797
// / for (i = 0; i < D; ++i) {
1798
1798
// / = out[i];
1799
1799
// / out[i+D] =
1800
1800
// / }
1801
1801
static bool isSafeDependenceDistance (const DataLayout &DL, ScalarEvolution &SE,
1802
1802
const SCEV &MaxBTC, const SCEV &Dist,
1803
- uint64_t MaxStride,
1804
- uint64_t TypeByteSize) {
1803
+ uint64_t MaxStride) {
1805
1804
1806
1805
// If we can prove that
1807
1806
// (**) |Dist| > MaxBTC * Step
@@ -1820,8 +1819,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1820
1819
// will be executed only if LoopCount >= VF, proving distance >= LoopCount
1821
1820
// also guarantees that distance >= VF.
1822
1821
//
1823
- const uint64_t ByteStride = MaxStride * TypeByteSize;
1824
- const SCEV *Step = SE.getConstant (MaxBTC.getType (), ByteStride);
1822
+ const SCEV *Step = SE.getConstant (MaxBTC.getType (), MaxStride);
1825
1823
const SCEV *Product = SE.getMulExpr (&MaxBTC, Step);
1826
1824
1827
1825
const SCEV *CastedDist = &Dist;
@@ -1851,8 +1849,8 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1851
1849
}
1852
1850
1853
1851
// / Check the dependence for two accesses with the same stride \p Stride.
1854
- // / \p Distance is the positive distance and \p TypeByteSize is type size in
1855
- // / bytes.
1852
+ // / \p Distance is the positive distance in bytes, and \p TypeByteSize is type
1853
+ // / size in bytes.
1856
1854
// /
1857
1855
// / \returns true if they are independent.
1858
1856
static bool areStridedAccessesIndependent (uint64_t Distance, uint64_t Stride,
@@ -1865,25 +1863,23 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1865
1863
if (Distance % TypeByteSize)
1866
1864
return false ;
1867
1865
1868
- uint64_t ScaledDist = Distance / TypeByteSize;
1869
-
1870
- // No dependence if the scaled distance is not multiple of the stride.
1866
+ // No dependence if the distance is not multiple of the stride.
1871
1867
// E.g.
1872
1868
// for (i = 0; i < 1024 ; i += 4)
1873
1869
// A[i+2] = A[i] + 1;
1874
1870
//
1875
- // Two accesses in memory (scaled distance is 2, stride is 4):
1871
+ // Two accesses in memory (distance is 2, stride is 4):
1876
1872
// | A[0] | | | | A[4] | | | |
1877
1873
// | | | A[2] | | | | A[6] | |
1878
1874
//
1879
1875
// E.g.
1880
1876
// for (i = 0; i < 1024 ; i += 3)
1881
1877
// A[i+4] = A[i] + 1;
1882
1878
//
1883
- // Two accesses in memory (scaled distance is 4, stride is 3):
1879
+ // Two accesses in memory (distance is 4, stride is 3):
1884
1880
// | A[0] | | | A[3] | | | A[6] | | |
1885
1881
// | | | | | A[4] | | | A[7] | |
1886
- return ScaledDist % Stride;
1882
+ return Distance % Stride;
1887
1883
}
1888
1884
1889
1885
std::variant<MemoryDepChecker::Dependence::DepType,
@@ -1992,25 +1988,28 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
1992
1988
return MemoryDepChecker::Dependence::Unknown;
1993
1989
}
1994
1990
1995
- uint64_t TypeByteSize = DL.getTypeAllocSize (ATy);
1996
- bool HasSameSize =
1997
- DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1998
- if (!HasSameSize)
1999
- TypeByteSize = 0 ;
1991
+ TypeSize AStoreSz = DL.getTypeStoreSize (ATy);
1992
+ TypeSize BStoreSz = DL.getTypeStoreSize (BTy);
1993
+
1994
+ // If store sizes are not the same, set TypeByteSize to zero, so we can check
1995
+ // it in the caller isDependent.
1996
+ uint64_t ASz = DL.getTypeAllocSize (ATy);
1997
+ uint64_t BSz = DL.getTypeAllocSize (BTy);
1998
+ uint64_t TypeByteSize = (AStoreSz == BStoreSz) ? BSz : 0 ;
2000
1999
2001
- StrideAPtrInt = std::abs (StrideAPtrInt);
2002
- StrideBPtrInt = std::abs (StrideBPtrInt);
2000
+ uint64_t StrideAScaled = std::abs (StrideAPtrInt) * ASz ;
2001
+ uint64_t StrideBScaled = std::abs (StrideBPtrInt) * BSz ;
2003
2002
2004
- uint64_t MaxStride = std::max (StrideAPtrInt, StrideBPtrInt );
2003
+ uint64_t MaxStride = std::max (StrideAScaled, StrideBScaled );
2005
2004
2006
2005
std::optional<uint64_t > CommonStride;
2007
- if (StrideAPtrInt == StrideBPtrInt )
2008
- CommonStride = StrideAPtrInt ;
2006
+ if (StrideAScaled == StrideBScaled )
2007
+ CommonStride = StrideAScaled ;
2009
2008
2010
2009
// TODO: Historically, we don't retry with runtime checks unless the
2011
2010
// (unscaled) strides are the same. Fix this once the condition for runtime
2012
2011
// checks in isDependent is fixed.
2013
- bool ShouldRetryWithRuntimeCheck = CommonStride. has_value () ;
2012
+ bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt ;
2014
2013
2015
2014
return DepDistanceStrideAndSizeInfo (Dist, MaxStride, CommonStride,
2016
2015
ShouldRetryWithRuntimeCheck, TypeByteSize,
@@ -2050,9 +2049,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2050
2049
// upper bound of the number of iterations), the accesses are independet, i.e.
2051
2050
// they are far enough appart that accesses won't access the same location
2052
2051
// across all loop ierations.
2053
- if (HasSameSize && isSafeDependenceDistance (
2054
- DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()),
2055
- *Dist, MaxStride, TypeByteSize ))
2052
+ if (HasSameSize &&
2053
+ isSafeDependenceDistance (
2054
+ DL, SE, *(PSE. getSymbolicMaxBackedgeTakenCount ()), *Dist, MaxStride))
2056
2055
return Dependence::NoDep;
2057
2056
2058
2057
const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
@@ -2156,8 +2155,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2156
2155
2157
2156
// It's not vectorizable if the distance is smaller than the minimum distance
2158
2157
// needed for a vectroized/unrolled version. Vectorizing one iteration in
2159
- // front needs TypeByteSize * Stride . Vectorizing the last iteration needs
2160
- // TypeByteSize (No need to plus the last gap distance).
2158
+ // front needs CommonStride . Vectorizing the last iteration needs TypeByteSize
2159
+ // (No need to plus the last gap distance).
2161
2160
//
2162
2161
// E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2163
2162
// foo(int *A) {
@@ -2166,7 +2165,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2166
2165
// B[i] = A[i] + 1;
2167
2166
// }
2168
2167
//
2169
- // Two accesses in memory (stride is 2):
2168
+ // Two accesses in memory (stride is 4 * 2):
2170
2169
// | A[0] | | A[2] | | A[4] | | A[6] | |
2171
2170
// | B[0] | | B[2] | | B[4] |
2172
2171
//
@@ -2184,8 +2183,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2184
2183
// We know that Dist is positive, but it may not be constant. Use the signed
2185
2184
// minimum for computations below, as this ensures we compute the closest
2186
2185
// possible dependence distance.
2187
- uint64_t MinDistanceNeeded =
2188
- TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2186
+ uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2189
2187
if (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
2190
2188
if (!ConstDist) {
2191
2189
// For non-constant distances, we checked the lower bound of the
@@ -2241,7 +2239,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2241
2239
2242
2240
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2243
2241
// since there is a backwards dependency.
2244
- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * * CommonStride) ;
2242
+ uint64_t MaxVF = MinDepDistBytes / * CommonStride;
2245
2243
LLVM_DEBUG (dbgs () << " LAA: Positive min distance " << MinDistance
2246
2244
<< " with max VF = " << MaxVF << ' \n ' );
2247
2245
0 commit comments