31
31
#include " llvm/Analysis/ScalarEvolution.h"
32
32
#include " llvm/Analysis/ScalarEvolutionExpressions.h"
33
33
#include " llvm/Analysis/TargetLibraryInfo.h"
34
+ #include " llvm/Analysis/TargetTransformInfo.h"
34
35
#include " llvm/Analysis/ValueTracking.h"
35
36
#include " llvm/Analysis/VectorUtils.h"
36
37
#include " llvm/IR/BasicBlock.h"
@@ -2122,32 +2123,34 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2122
2123
return Dependence::Forward;
2123
2124
}
2124
2125
2125
- if (!C) {
2126
- // TODO: FoundNonConstantDistanceDependence is used as a necessary condition
2127
- // to consider retrying with runtime checks. Historically, we did not set it
2128
- // when strides were different but there is no inherent reason to.
2126
+ int64_t MinDistance = SE.getSignedRangeMin (Dist).getSExtValue ();
2127
+ // Below we only handle strictly positive distances.
2128
+ if (MinDistance <= 0 ) {
2129
2129
FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2130
- LLVM_DEBUG (dbgs () << " LAA: Dependence because of non-constant distance\n " );
2131
2130
return Dependence::Unknown;
2132
2131
}
2133
2132
2134
- if (!SE.isKnownPositive (Dist))
2135
- return Dependence::Unknown;
2133
+ if (!isa<SCEVConstant>(Dist)) {
2134
+ // Previously this case would be treated as Unknown, possibly setting
2135
+ // FoundNonConstantDistanceDependence to force re-trying with runtime
2136
+ // checks. Until the TODO below is addressed, set it here to preserve
2137
+ // original behavior w.r.t. re-trying with runtime checks.
2138
+ // TODO: FoundNonConstantDistanceDependence is used as a necessary
2139
+ // condition to consider retrying with runtime checks. Historically, we
2140
+ // did not set it when strides were different but there is no inherent
2141
+ // reason to.
2142
+ FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2143
+ }
2136
2144
2137
2145
if (!HasSameSize) {
2138
2146
LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
2139
2147
" different type sizes\n " );
2140
2148
return Dependence::Unknown;
2141
2149
}
2142
2150
2143
- // The logic below currently only supports StrideA == StrideB, i.e. there's a
2144
- // common stride.
2145
2151
if (!CommonStride)
2146
2152
return Dependence::Unknown;
2147
2153
2148
- const APInt &Val = C->getAPInt ();
2149
- int64_t Distance = Val.getSExtValue ();
2150
-
2151
2154
// Bail out early if passed-in parameters make vectorization not feasible.
2152
2155
unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
2153
2156
VectorizerParams::VectorizationFactor : 1 );
@@ -2172,8 +2175,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2172
2175
// | A[0] | | A[2] | | A[4] | | A[6] | |
2173
2176
// | B[0] | | B[2] | | B[4] |
2174
2177
//
2175
- // Distance needs for vectorizing iterations except the last iteration:
2176
- // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
2178
+ // MinDistance needs for vectorizing iterations except the last iteration:
2179
+ // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
2177
2180
// So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
2178
2181
//
2179
2182
// If MinNumIter is 2, it is vectorizable as the minimum distance needed is
@@ -2182,11 +2185,22 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2182
2185
// If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2183
2186
// the minimum distance needed is 28, which is greater than distance. It is
2184
2187
// not safe to do vectorization.
2188
+
2189
+ // We know that Dist is positive, but it may not be constant. Use the signed
2190
+ // minimum for computations below, as this ensures we compute the closest
2191
+ // possible dependence distance.
2185
2192
uint64_t MinDistanceNeeded =
2186
- TypeByteSize * (*CommonStride) * (MinNumIter - 1 ) + TypeByteSize;
2187
- if (MinDistanceNeeded > static_cast <uint64_t >(Distance)) {
2188
- LLVM_DEBUG (dbgs () << " LAA: Failure because of positive distance "
2189
- << Distance << ' \n ' );
2193
+ TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2194
+ if (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
2195
+ if (!isa<SCEVConstant>(Dist)) {
2196
+ // For non-constant distances, we checked the lower bound of the
2197
+ // dependence distance and the distance may be larger at runtime (and safe
2198
+ // for vectorization). Classify it as Unknown, so we re-try with runtime
2199
+ // checks.
2200
+ return Dependence::Unknown;
2201
+ }
2202
+ LLVM_DEBUG (dbgs () << " LAA: Failure because of positive minimum distance "
2203
+ << MinDistance << ' \n ' );
2190
2204
return Dependence::Backward;
2191
2205
}
2192
2206
@@ -2215,12 +2229,13 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2215
2229
// is 8, which is less than 2 and forbidden vectorization, But actually
2216
2230
// both A and B could be vectorized by 2 iterations.
2217
2231
MinDepDistBytes =
2218
- std::min (static_cast <uint64_t >(Distance ), MinDepDistBytes);
2232
+ std::min (static_cast <uint64_t >(MinDistance ), MinDepDistBytes);
2219
2233
2220
2234
bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2221
2235
uint64_t MinDepDistBytesOld = MinDepDistBytes;
2222
2236
if (IsTrueDataDependence && EnableForwardingConflictDetection &&
2223
- couldPreventStoreLoadForward (Distance, TypeByteSize)) {
2237
+ isa<SCEVConstant>(Dist) &&
2238
+ couldPreventStoreLoadForward (MinDistance, TypeByteSize)) {
2224
2239
// Sanity check that we didn't update MinDepDistBytes when calling
2225
2240
// couldPreventStoreLoadForward
2226
2241
assert (MinDepDistBytes == MinDepDistBytesOld &&
@@ -2232,10 +2247,18 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
2232
2247
2233
2248
// An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2234
2249
// since there is a backwards dependency.
2235
- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * ( *CommonStride) );
2236
- LLVM_DEBUG (dbgs () << " LAA: Positive distance " << Val. getSExtValue ()
2250
+ uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride);
2251
+ LLVM_DEBUG (dbgs () << " LAA: Positive min distance " << MinDistance
2237
2252
<< " with max VF = " << MaxVF << ' \n ' );
2253
+
2238
2254
uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8 ;
2255
+ if (!isa<SCEVConstant>(Dist) && MaxVFInBits < MaxTargetVectorWidthInBits) {
2256
+ // For non-constant distances, we checked the lower bound of the dependence
2257
+ // distance and the distance may be larger at runtime (and safe for
2258
+ // vectorization). Classify it as Unknown, so we re-try with runtime checks.
2259
+ return Dependence::Unknown;
2260
+ }
2261
+
2239
2262
MaxSafeVectorWidthInBits = std::min (MaxSafeVectorWidthInBits, MaxVFInBits);
2240
2263
return Dependence::BackwardVectorizable;
2241
2264
}
@@ -3018,11 +3041,28 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
3018
3041
}
3019
3042
3020
3043
LoopAccessInfo::LoopAccessInfo (Loop *L, ScalarEvolution *SE,
3044
+ const TargetTransformInfo *TTI,
3021
3045
const TargetLibraryInfo *TLI, AAResults *AA,
3022
3046
DominatorTree *DT, LoopInfo *LI)
3023
3047
: PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3024
- PtrRtChecking (nullptr ),
3025
- DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
3048
+ PtrRtChecking (nullptr ), TheLoop(L) {
3049
+ unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
3050
+ if (TTI) {
3051
+ TypeSize FixedWidth =
3052
+ TTI->getRegisterBitWidth (TargetTransformInfo::RGK_FixedWidthVector);
3053
+ if (FixedWidth.isNonZero ()) {
3054
+ // Scale the vector width by 2 as rough estimate to also consider
3055
+ // interleaving.
3056
+ MaxTargetVectorWidthInBits = FixedWidth.getFixedValue () * 2 ;
3057
+ }
3058
+
3059
+ TypeSize ScalableWidth =
3060
+ TTI->getRegisterBitWidth (TargetTransformInfo::RGK_ScalableVector);
3061
+ if (ScalableWidth.isNonZero ())
3062
+ MaxTargetVectorWidthInBits = std::numeric_limits<unsigned >::max ();
3063
+ }
3064
+ DepChecker =
3065
+ std::make_unique<MemoryDepChecker>(*PSE, L, MaxTargetVectorWidthInBits);
3026
3066
PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3027
3067
if (canAnalyzeLoop ()) {
3028
3068
analyzeLoop (AA, LI, TLI, DT);
@@ -3082,7 +3122,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
3082
3122
3083
3123
if (I.second )
3084
3124
I.first ->second =
3085
- std::make_unique<LoopAccessInfo>(&L, &SE, TLI, &AA, &DT, &LI);
3125
+ std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI);
3086
3126
3087
3127
return *I.first ->second ;
3088
3128
}
@@ -3111,8 +3151,9 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
3111
3151
auto &AA = FAM.getResult <AAManager>(F);
3112
3152
auto &DT = FAM.getResult <DominatorTreeAnalysis>(F);
3113
3153
auto &LI = FAM.getResult <LoopAnalysis>(F);
3154
+ auto &TTI = FAM.getResult <TargetIRAnalysis>(F);
3114
3155
auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
3115
- return LoopAccessInfoManager (SE, AA, DT, LI, &TLI);
3156
+ return LoopAccessInfoManager (SE, AA, DT, LI, &TTI, & TLI);
3116
3157
}
3117
3158
3118
3159
AnalysisKey LoopAccessAnalysis::Key;
0 commit comments