@@ -4256,9 +4256,10 @@ static unsigned getEstimatedRuntimeVF(ElementCount VF,
4256
4256
return EstimatedVF;
4257
4257
}
4258
4258
4259
- bool LoopVectorizationPlanner::isMoreProfitable(
4260
- const VectorizationFactor &A, const VectorizationFactor &B,
4261
- const unsigned MaxTripCount) const {
4259
+ bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
4260
+ const VectorizationFactor &B,
4261
+ const unsigned MaxTripCount,
4262
+ bool HasTail) const {
4262
4263
InstructionCost CostA = A.Cost;
4263
4264
InstructionCost CostB = B.Cost;
4264
4265
@@ -4296,9 +4297,9 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4296
4297
if (!MaxTripCount)
4297
4298
return CmpFn(CostA * EstimatedWidthB, CostB * EstimatedWidthA);
4298
4299
4299
- auto GetCostForTC = [MaxTripCount, this ](unsigned VF,
4300
- InstructionCost VectorCost,
4301
- InstructionCost ScalarCost) {
4300
+ auto GetCostForTC = [MaxTripCount, HasTail ](unsigned VF,
4301
+ InstructionCost VectorCost,
4302
+ InstructionCost ScalarCost) {
4302
4303
// If the trip count is a known (possibly small) constant, the trip count
4303
4304
// will be rounded up to an integer number of iterations under
4304
4305
// FoldTailByMasking. The total cost in that case will be
@@ -4307,20 +4308,23 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4307
4308
// some extra overheads, but for the purpose of comparing the costs of
4308
4309
// different VFs we can use this to compare the total loop-body cost
4309
4310
// expected after vectorization.
4310
- if (CM.foldTailByMasking())
4311
- return VectorCost * divideCeil(MaxTripCount, VF);
4312
- return VectorCost * (MaxTripCount / VF) + ScalarCost * (MaxTripCount % VF);
4311
+ if (HasTail)
4312
+ return VectorCost * (MaxTripCount / VF) +
4313
+ ScalarCost * (MaxTripCount % VF);
4314
+ return VectorCost * divideCeil(MaxTripCount, VF);
4313
4315
};
4314
4316
4315
4317
auto RTCostA = GetCostForTC(EstimatedWidthA, CostA, A.ScalarCost);
4316
4318
auto RTCostB = GetCostForTC(EstimatedWidthB, CostB, B.ScalarCost);
4317
4319
return CmpFn(RTCostA, RTCostB);
4318
4320
}
4319
4321
4320
- bool LoopVectorizationPlanner::isMoreProfitable(
4321
- const VectorizationFactor &A, const VectorizationFactor &B) const {
4322
+ bool LoopVectorizationPlanner::isMoreProfitable(const VectorizationFactor &A,
4323
+ const VectorizationFactor &B,
4324
+ bool HasTail) const {
4322
4325
const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount();
4323
- return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount);
4326
+ return LoopVectorizationPlanner::isMoreProfitable(A, B, MaxTripCount,
4327
+ HasTail);
4324
4328
}
4325
4329
4326
4330
void LoopVectorizationPlanner::emitInvalidCostRemarks(
@@ -4609,7 +4613,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4609
4613
continue;
4610
4614
}
4611
4615
4612
- if (isMoreProfitable(Candidate, ChosenFactor))
4616
+ if (isMoreProfitable(Candidate, ChosenFactor, P->hasScalarTail() ))
4613
4617
ChosenFactor = Candidate;
4614
4618
}
4615
4619
}
@@ -4623,7 +4627,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4623
4627
}
4624
4628
4625
4629
LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() &&
4626
- !isMoreProfitable(ChosenFactor, ScalarCost)) dbgs()
4630
+ !isMoreProfitable(ChosenFactor, ScalarCost,
4631
+ !CM.foldTailByMasking())) dbgs()
4627
4632
<< "LV: Vectorization seems to be not beneficial, "
4628
4633
<< "but was forced by a user.\n");
4629
4634
return ChosenFactor;
@@ -4789,7 +4794,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4789
4794
}
4790
4795
4791
4796
if (Result.Width.isScalar() ||
4792
- isMoreProfitable(NextVF, Result, MaxTripCount))
4797
+ isMoreProfitable(NextVF, Result, MaxTripCount, !CM.foldTailByMasking() ))
4793
4798
Result = NextVF;
4794
4799
}
4795
4800
@@ -7768,11 +7773,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7768
7773
7769
7774
InstructionCost Cost = cost(*P, VF);
7770
7775
VectorizationFactor CurrentFactor(VF, Cost, ScalarCost);
7771
- if (isMoreProfitable(CurrentFactor, BestFactor))
7776
+ if (isMoreProfitable(CurrentFactor, BestFactor, P->hasScalarTail() ))
7772
7777
BestFactor = CurrentFactor;
7773
7778
7774
7779
// If profitable add it to ProfitableVF list.
7775
- if (isMoreProfitable(CurrentFactor, ScalarFactor))
7780
+ if (isMoreProfitable(CurrentFactor, ScalarFactor, P->hasScalarTail() ))
7776
7781
ProfitableVFs.push_back(CurrentFactor);
7777
7782
}
7778
7783
}
0 commit comments