Skip to content

Commit e27a21f

Browse files
authored
[VPlan] Add hasScalarTail, use instead of !CM.foldTailByMasking() (NFC). (#134674)
Now that VPlan is able to fold away redundant branches to the scalar preheader, we can directly check in VPlan if the scalar tail may execute. hasScalarTail returns true if the tail may execute. We know that the scalar tail won't execute if the scalar preheader doesn't have any predecessors, i.e. is not reachable. This removes some late uses of the legacy cost model. PR: #134674
1 parent b46f16c commit e27a21f

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,13 +535,13 @@ class LoopVectorizationPlanner {
535535
/// Returns true if the per-lane cost of VectorizationFactor A is lower than
536536
/// that of B.
537537
bool isMoreProfitable(const VectorizationFactor &A,
538-
const VectorizationFactor &B) const;
538+
const VectorizationFactor &B, bool HasTail) const;
539539

540540
/// Returns true if the per-lane cost of VectorizationFactor A is lower than
541541
/// that of B in the context of vectorizing a loop with known \p MaxTripCount.
542542
bool isMoreProfitable(const VectorizationFactor &A,
543543
const VectorizationFactor &B,
544-
const unsigned MaxTripCount) const;
544+
const unsigned MaxTripCount, bool HasTail) const;
545545

546546
/// Determines if we have the infrastructure to vectorize the loop and its
547547
/// epilogue, assuming the main loop is vectorized by \p VF.

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4256,9 +4256,10 @@ static unsigned getEstimatedRuntimeVF(ElementCount VF,
42564256
return EstimatedVF;
42574257
}
42584258

4259-
bool LoopVectorizationPlanner::isMoreProfitable(
4260-
const VectorizationFactor &A, const VectorizationFactor &B,
4261-
const unsigned MaxTripCount) const {
4259+
bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
4260+
const VectorizationFactor &B,
4261+
const unsigned MaxTripCount,
4262+
bool HasTail) const {
42624263
InstructionCost CostA = A.Cost;
42634264
InstructionCost CostB = B.Cost;
42644265

@@ -4296,9 +4297,9 @@ bool LoopVectorizationPlanner::isMoreProfitable(
42964297
if (!MaxTripCount)
42974298
return CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA);
42984299

4299-
auto GetCostForTC = [MaxTripCount, this](unsigned VF,
4300-
InstructionCost VectorCost,
4301-
InstructionCost ScalarCost) {
4300+
auto GetCostForTC = [MaxTripCount, HasTail](unsigned VF,
4301+
InstructionCost VectorCost,
4302+
InstructionCost ScalarCost) {
43024303
// If the trip count is a known (possibly small) constant, the trip count
43034304
// will be rounded up to an integer number of iterations under
43044305
// FoldTailByMasking. The total cost in that case will be
@@ -4307,20 +4308,23 @@ bool LoopVectorizationPlanner::isMoreProfitable(
43074308
// some extra overheads, but for the purpose of comparing the costs of
43084309
// different VFs we can use this to compare the total loop-body cost
43094310
// expected after vectorization.
4310-
if (CM.foldTailByMasking())
4311-
return VectorCost * divideCeil(MaxTripCount, VF);
4312-
return VectorCost * (MaxTripCount / VF) + ScalarCost * (MaxTripCount % VF);
4311+
if (HasTail)
4312+
return VectorCost * (MaxTripCount / VF) +
4313+
ScalarCost * (MaxTripCount % VF);
4314+
return VectorCost * divideCeil(MaxTripCount, VF);
43134315
};
43144316

43154317
auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost);
43164318
auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost);
43174319
return CmpFn(RTCostA, RTCostB);
43184320
}
43194321

4320-
bool LoopVectorizationPlanner::isMoreProfitable(
4321-
const VectorizationFactor &A, const VectorizationFactor &B) const {
4322+
bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
4323+
const VectorizationFactor &B,
4324+
bool HasTail) const {
43224325
const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount();
4323-
return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount);
4326+
return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount,
4327+
HasTail);
43244328
}
43254329

43264330
void LoopVectorizationPlanner::emitInvalidCostRemarks(
@@ -4609,7 +4613,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
46094613
continue;
46104614
}
46114615

4612-
if (isMoreProfitable(Candidate, ChosenFactor))
4616+
if (isMoreProfitable(Candidate, ChosenFactor, P->hasScalarTail()))
46134617
ChosenFactor = Candidate;
46144618
}
46154619
}
@@ -4623,7 +4627,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
46234627
}
46244628

46254629
LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() &&
4626-
!isMoreProfitable(ChosenFactor, ScalarCost)) dbgs()
4630+
!isMoreProfitable(ChosenFactor, ScalarCost,
4631+
!CM.foldTailByMasking())) dbgs()
46274632
<< "LV: Vectorization seems to be not beneficial, "
46284633
<< "but was forced by a user.\n");
46294634
return ChosenFactor;
@@ -4789,7 +4794,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
47894794
}
47904795

47914796
if (Result.Width.isScalar() ||
4792-
isMoreProfitable(NextVF, Result, MaxTripCount))
4797+
isMoreProfitable(NextVF, Result, MaxTripCount, !CM.foldTailByMasking()))
47934798
Result = NextVF;
47944799
}
47954800

@@ -7768,11 +7773,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
77687773

77697774
InstructionCost Cost = cost(*P, VF);
77707775
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
7771-
if (isMoreProfitable(CurrentFactor, BestFactor))
7776+
if (isMoreProfitable(CurrentFactor, BestFactor, P->hasScalarTail()))
77727777
BestFactor = CurrentFactor;
77737778

77747779
// If profitable add it to ProfitableVF list.
7775-
if (isMoreProfitable(CurrentFactor, ScalarFactor))
7780+
if (isMoreProfitable(CurrentFactor, ScalarFactor, P->hasScalarTail()))
77767781
ProfitableVFs.push_back(CurrentFactor);
77777782
}
77787783
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3790,6 +3790,13 @@ class VPlan {
37903790
bool hasEarlyExit() const {
37913791
return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1;
37923792
}
3793+
3794+
/// Returns true if the scalar tail may execute after the vector loop. Note
3795+
/// that this relies on unneeded branches to the scalar tail loop being
3796+
/// removed.
3797+
bool hasScalarTail() const {
3798+
return getScalarPreheader()->getNumPredecessors() != 0;
3799+
}
37933800
};
37943801

37953802
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

0 commit comments

Comments
 (0)