@@ -4253,9 +4253,10 @@ static unsigned getEstimatedRuntimeVF(ElementCount VF,
4253
4253
return EstimatedVF;
4254
4254
}
4255
4255
4256
- bool LoopVectorizationPlanner::isMoreProfitable(
4257
- const VectorizationFactor &A, const VectorizationFactor &B,
4258
- const unsigned MaxTripCount) const {
4256
+ bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
4257
+ const VectorizationFactor &B,
4258
+ const unsigned MaxTripCount,
4259
+ bool HasTail) const {
4259
4260
InstructionCost CostA = A.Cost;
4260
4261
InstructionCost CostB = B.Cost;
4261
4262
@@ -4293,9 +4294,9 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4293
4294
if (!MaxTripCount)
4294
4295
return CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA);
4295
4296
4296
- auto GetCostForTC = [MaxTripCount, this ](unsigned VF,
4297
- InstructionCost VectorCost,
4298
- InstructionCost ScalarCost) {
4297
+ auto GetCostForTC = [MaxTripCount, HasTail ](unsigned VF,
4298
+ InstructionCost VectorCost,
4299
+ InstructionCost ScalarCost) {
4299
4300
// If the trip count is a known (possibly small) constant, the trip count
4300
4301
// will be rounded up to an integer number of iterations under
4301
4302
// FoldTailByMasking. The total cost in that case will be
@@ -4304,20 +4305,23 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4304
4305
// some extra overheads, but for the purpose of comparing the costs of
4305
4306
// different VFs we can use this to compare the total loop-body cost
4306
4307
// expected after vectorization.
4307
- if (CM.foldTailByMasking())
4308
- return VectorCost * divideCeil(MaxTripCount, VF);
4309
- return VectorCost * (MaxTripCount / VF) + ScalarCost * (MaxTripCount % VF);
4308
+ if (HasTail)
4309
+ return VectorCost * (MaxTripCount / VF) +
4310
+ ScalarCost * (MaxTripCount % VF);
4311
+ return VectorCost * divideCeil(MaxTripCount, VF);
4310
4312
};
4311
4313
4312
4314
auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost);
4313
4315
auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost);
4314
4316
return CmpFn(RTCostA, RTCostB);
4315
4317
}
4316
4318
4317
- bool LoopVectorizationPlanner::isMoreProfitable(
4318
- const VectorizationFactor &A, const VectorizationFactor &B) const {
4319
+ bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
4320
+ const VectorizationFactor &B,
4321
+ bool HasTail) const {
4319
4322
const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount();
4320
- return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount);
4323
+ return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount,
4324
+ HasTail);
4321
4325
}
4322
4326
4323
4327
void LoopVectorizationPlanner::emitInvalidCostRemarks(
@@ -4607,7 +4611,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4607
4611
continue;
4608
4612
}
4609
4613
4610
- if (isMoreProfitable(Candidate, ChosenFactor))
4614
+ if (isMoreProfitable(Candidate, ChosenFactor, P->hasScalarTail() ))
4611
4615
ChosenFactor = Candidate;
4612
4616
}
4613
4617
}
@@ -4621,7 +4625,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4621
4625
}
4622
4626
4623
4627
LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() &&
4624
- !isMoreProfitable(ChosenFactor, ScalarCost)) dbgs()
4628
+ !isMoreProfitable(ChosenFactor, ScalarCost,
4629
+ !CM.foldTailByMasking())) dbgs()
4625
4630
<< "LV: Vectorization seems to be not beneficial, "
4626
4631
<< "but was forced by a user.\n");
4627
4632
return ChosenFactor;
@@ -4713,7 +4718,8 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4713
4718
4714
4719
if (EpilogueVectorizationForceVF > 1) {
4715
4720
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n");
4716
- ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
4721
+ ElementCount ForcedEC =
4722
+ ElementCount::getFixed(EpilogueVectorizationForceVF);
4717
4723
if (hasPlanWithVF(ForcedEC))
4718
4724
return {ForcedEC, 0, 0};
4719
4725
@@ -4787,7 +4793,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4787
4793
}
4788
4794
4789
4795
if (Result.Width.isScalar() ||
4790
- isMoreProfitable(NextVF, Result, MaxTripCount))
4796
+ isMoreProfitable(NextVF, Result, MaxTripCount, !CM.foldTailByMasking() ))
4791
4797
Result = NextVF;
4792
4798
}
4793
4799
@@ -7540,11 +7546,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7540
7546
7541
7547
InstructionCost Cost = cost(*P, VF);
7542
7548
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
7543
- if (isMoreProfitable(CurrentFactor, BestFactor))
7549
+ if (isMoreProfitable(CurrentFactor, BestFactor, P->hasScalarTail() ))
7544
7550
BestFactor = CurrentFactor;
7545
7551
7546
7552
// If profitable add it to ProfitableVF list.
7547
- if (isMoreProfitable(CurrentFactor, ScalarFactor))
7553
+ if (isMoreProfitable(CurrentFactor, ScalarFactor, P->hasScalarTail() ))
7548
7554
ProfitableVFs.push_back(CurrentFactor);
7549
7555
}
7550
7556
}
0 commit comments