|
61 | 61 | #include <cstdint>
|
62 | 62 | #include <iterator>
|
63 | 63 | #include <utility>
|
| 64 | +#include <variant> |
64 | 65 | #include <vector>
|
65 | 66 |
|
66 | 67 | using namespace llvm;
|
@@ -1876,76 +1877,109 @@ isLoopVariantIndirectAddress(ArrayRef<const Value *> UnderlyingObjects,
|
1876 | 1877 | });
|
1877 | 1878 | }
|
1878 | 1879 |
|
1879 |
| -MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( |
1880 |
| - const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, |
1881 |
| - unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides, |
1882 |
| - const DenseMap<Value *, SmallVector<const Value *, 16>> |
1883 |
| - &UnderlyingObjects) { |
1884 |
| - assert (AIdx < BIdx && "Must pass arguments in program order"); |
1885 |
| - |
| 1880 | +// Get the dependence distance, stride, type size in whether i is a write for |
| 1881 | +// the dependence between A and B. Returns a DepType, if we can prove there's |
| 1882 | +// no dependence or the analysis fails. Outlined to lambda to limit he scope |
| 1883 | +// of various temporary variables, like A/BPtr, StrideA/BPtr and others. |
| 1884 | +// Returns either the dependence result, if it could already be determined, or a |
| 1885 | +// tuple with (Distance, Stride, TypeSize, AIsWrite, BIsWrite). |
| 1886 | +static std::variant<MemoryDepChecker::Dependence::DepType, |
| 1887 | + std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>> |
| 1888 | +getDependenceDistanceStrideAndSize( |
| 1889 | + const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, |
| 1890 | + const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, |
| 1891 | + const DenseMap<Value *, const SCEV *> &Strides, |
| 1892 | + const DenseMap<Value *, SmallVector<const Value *, 16>> &UnderlyingObjects, |
| 1893 | + PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) { |
| 1894 | + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); |
| 1895 | + auto &SE = *PSE.getSE(); |
1886 | 1896 | auto [APtr, AIsWrite] = A;
|
1887 | 1897 | auto [BPtr, BIsWrite] = B;
|
1888 |
| - Type *ATy = getLoadStoreType(InstMap[AIdx]); |
1889 |
| - Type *BTy = getLoadStoreType(InstMap[BIdx]); |
1890 | 1898 |
|
1891 | 1899 | // Two reads are independent.
|
1892 | 1900 | if (!AIsWrite && !BIsWrite)
|
1893 |
| - return Dependence::NoDep; |
| 1901 | + return MemoryDepChecker::Dependence::NoDep; |
| 1902 | + |
| 1903 | + Type *ATy = getLoadStoreType(AInst); |
| 1904 | + Type *BTy = getLoadStoreType(BInst); |
1894 | 1905 |
|
1895 | 1906 | // We cannot check pointers in different address spaces.
|
1896 | 1907 | if (APtr->getType()->getPointerAddressSpace() !=
|
1897 | 1908 | BPtr->getType()->getPointerAddressSpace())
|
1898 |
| - return Dependence::Unknown; |
| 1909 | + return MemoryDepChecker::Dependence::Unknown; |
1899 | 1910 |
|
1900 | 1911 | int64_t StrideAPtr =
|
1901 |
| - getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0); |
| 1912 | + getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true).value_or(0); |
1902 | 1913 | int64_t StrideBPtr =
|
1903 |
| - getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0); |
| 1914 | + getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true).value_or(0); |
1904 | 1915 |
|
1905 | 1916 | const SCEV *Src = PSE.getSCEV(APtr);
|
1906 | 1917 | const SCEV *Sink = PSE.getSCEV(BPtr);
|
1907 | 1918 |
|
1908 |
| - // If the induction step is negative we have to invert source and sink of the |
1909 |
| - // dependence. |
| 1919 | + // If the induction step is negative we have to invert source and sink of |
| 1920 | + // the dependence. |
1910 | 1921 | if (StrideAPtr < 0) {
|
1911 | 1922 | std::swap(APtr, BPtr);
|
1912 | 1923 | std::swap(ATy, BTy);
|
1913 | 1924 | std::swap(Src, Sink);
|
1914 | 1925 | std::swap(AIsWrite, BIsWrite);
|
1915 |
| - std::swap(AIdx, BIdx); |
| 1926 | + std::swap(AInst, BInst); |
1916 | 1927 | std::swap(StrideAPtr, StrideBPtr);
|
1917 | 1928 | }
|
1918 | 1929 |
|
1919 |
| - ScalarEvolution &SE = *PSE.getSE(); |
1920 | 1930 | const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
|
1921 | 1931 |
|
1922 | 1932 | LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
|
1923 | 1933 | << "(Induction step: " << StrideAPtr << ")\n");
|
1924 |
| - LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " |
1925 |
| - << *InstMap[BIdx] << ": " << *Dist << "\n"); |
| 1934 | + LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst |
| 1935 | + << ": " << *Dist << "\n"); |
1926 | 1936 |
|
1927 | 1937 | // Needs accesses where the addresses of the accessed underlying objects do
|
1928 | 1938 | // not change within the loop.
|
1929 | 1939 | if (isLoopVariantIndirectAddress(UnderlyingObjects.find(APtr)->second, SE,
|
1930 | 1940 | InnermostLoop) ||
|
1931 | 1941 | isLoopVariantIndirectAddress(UnderlyingObjects.find(BPtr)->second, SE,
|
1932 | 1942 | InnermostLoop))
|
1933 |
| - return Dependence::IndirectUnsafe; |
| 1943 | + return MemoryDepChecker::Dependence::IndirectUnsafe; |
1934 | 1944 |
|
1935 | 1945 | // Need accesses with constant stride. We don't want to vectorize
|
1936 |
| - // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in |
1937 |
| - // the address space. |
1938 |
| - if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ |
| 1946 | + // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap |
| 1947 | + // in the address space. |
| 1948 | + if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { |
1939 | 1949 | LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
|
1940 |
| - return Dependence::Unknown; |
| 1950 | + return MemoryDepChecker::Dependence::Unknown; |
1941 | 1951 | }
|
1942 | 1952 |
|
1943 |
| - auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); |
1944 | 1953 | uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
|
1945 | 1954 | bool HasSameSize =
|
1946 | 1955 | DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
|
| 1956 | + if (!HasSameSize) |
| 1957 | + TypeByteSize = 0; |
1947 | 1958 | uint64_t Stride = std::abs(StrideAPtr);
|
| 1959 | + return std::make_tuple(Dist, Stride, TypeByteSize, AIsWrite, BIsWrite); |
| 1960 | +} |
| 1961 | + |
| 1962 | +MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( |
| 1963 | + const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B, |
| 1964 | + unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides, |
| 1965 | + const DenseMap<Value *, SmallVector<const Value *, 16>> |
| 1966 | + &UnderlyingObjects) { |
| 1967 | + assert(AIdx < BIdx && "Must pass arguments in program order"); |
| 1968 | + |
| 1969 | + // Get the dependence distance, stride, type size and what access writes for |
| 1970 | + // the dependence between A and B. |
| 1971 | + auto Res = getDependenceDistanceStrideAndSize( |
| 1972 | + A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE, |
| 1973 | + InnermostLoop); |
| 1974 | + if (std::holds_alternative<Dependence::DepType>(Res)) |
| 1975 | + return std::get<Dependence::DepType>(Res); |
1948 | 1976 |
|
| 1977 | + const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] = |
| 1978 | + std::get<std::tuple<const SCEV *, uint64_t, uint64_t, bool, bool>>(Res); |
| 1979 | + bool HasSameSize = TypeByteSize > 0; |
| 1980 | + |
| 1981 | + ScalarEvolution &SE = *PSE.getSE(); |
| 1982 | + auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); |
1949 | 1983 | if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
|
1950 | 1984 | isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist,
|
1951 | 1985 | Stride, TypeByteSize))
|
|
0 commit comments