@@ -4256,9 +4256,10 @@ static unsigned getEstimatedRuntimeVF(ElementCount VF,
4256
4256
return EstimatedVF;
4257
4257
}
4258
4258
4259
- bool LoopVectorizationPlanner::isMoreProfitable (
4260
- const VectorizationFactor &A, const VectorizationFactor &B,
4261
- const unsigned MaxTripCount) const {
4259
+ bool LoopVectorizationPlanner::isMoreProfitable (const VectorizationFactor &A,
4260
+ const VectorizationFactor &B,
4261
+ const unsigned MaxTripCount,
4262
+ bool HasTail) const {
4262
4263
InstructionCost CostA = A.Cost ;
4263
4264
InstructionCost CostB = B.Cost ;
4264
4265
@@ -4296,9 +4297,9 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4296
4297
if (!MaxTripCount)
4297
4298
return CmpFn (CostA * EstimatedWidthB, CostB * EstimatedWidthA);
4298
4299
4299
- auto GetCostForTC = [MaxTripCount, this ](unsigned VF,
4300
- InstructionCost VectorCost,
4301
- InstructionCost ScalarCost) {
4300
+ auto GetCostForTC = [MaxTripCount, HasTail ](unsigned VF,
4301
+ InstructionCost VectorCost,
4302
+ InstructionCost ScalarCost) {
4302
4303
// If the trip count is a known (possibly small) constant, the trip count
4303
4304
// will be rounded up to an integer number of iterations under
4304
4305
// FoldTailByMasking. The total cost in that case will be
@@ -4307,20 +4308,23 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4307
4308
// some extra overheads, but for the purpose of comparing the costs of
4308
4309
// different VFs we can use this to compare the total loop-body cost
4309
4310
// expected after vectorization.
4310
- if (CM.foldTailByMasking ())
4311
- return VectorCost * divideCeil (MaxTripCount, VF);
4312
- return VectorCost * (MaxTripCount / VF) + ScalarCost * (MaxTripCount % VF);
4311
+ if (HasTail)
4312
+ return VectorCost * (MaxTripCount / VF) +
4313
+ ScalarCost * (MaxTripCount % VF);
4314
+ return VectorCost * divideCeil (MaxTripCount, VF);
4313
4315
};
4314
4316
4315
4317
auto RTCostA = GetCostForTC (EstimatedWidthA, CostA, A.ScalarCost );
4316
4318
auto RTCostB = GetCostForTC (EstimatedWidthB, CostB, B.ScalarCost );
4317
4319
return CmpFn (RTCostA, RTCostB);
4318
4320
}
4319
4321
4320
- bool LoopVectorizationPlanner::isMoreProfitable (
4321
- const VectorizationFactor &A, const VectorizationFactor &B) const {
4322
+ bool LoopVectorizationPlanner::isMoreProfitable (const VectorizationFactor &A,
4323
+ const VectorizationFactor &B,
4324
+ bool HasTail) const {
4322
4325
const unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4323
- return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount);
4326
+ return LoopVectorizationPlanner::isMoreProfitable (A, B, MaxTripCount,
4327
+ HasTail);
4324
4328
}
4325
4329
4326
4330
void LoopVectorizationPlanner::emitInvalidCostRemarks (
@@ -4609,7 +4613,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4609
4613
continue ;
4610
4614
}
4611
4615
4612
- if (isMoreProfitable (Candidate, ChosenFactor))
4616
+ if (isMoreProfitable (Candidate, ChosenFactor, P-> hasScalarTail () ))
4613
4617
ChosenFactor = Candidate;
4614
4618
}
4615
4619
}
@@ -4623,7 +4627,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
4623
4627
}
4624
4628
4625
4629
LLVM_DEBUG (if (ForceVectorization && !ChosenFactor.Width .isScalar () &&
4626
- !isMoreProfitable (ChosenFactor, ScalarCost)) dbgs ()
4630
+ !isMoreProfitable (ChosenFactor, ScalarCost,
4631
+ !CM.foldTailByMasking ())) dbgs ()
4627
4632
<< " LV: Vectorization seems to be not beneficial, "
4628
4633
<< " but was forced by a user.\n " );
4629
4634
return ChosenFactor;
@@ -4789,7 +4794,7 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
4789
4794
}
4790
4795
4791
4796
if (Result.Width .isScalar () ||
4792
- isMoreProfitable (NextVF, Result, MaxTripCount))
4797
+ isMoreProfitable (NextVF, Result, MaxTripCount, !CM. foldTailByMasking () ))
4793
4798
Result = NextVF;
4794
4799
}
4795
4800
@@ -7768,11 +7773,11 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7768
7773
7769
7774
InstructionCost Cost = cost (*P, VF);
7770
7775
VectorizationFactor CurrentFactor (VF, Cost, ScalarCost);
7771
- if (isMoreProfitable (CurrentFactor, BestFactor))
7776
+ if (isMoreProfitable (CurrentFactor, BestFactor, P-> hasScalarTail () ))
7772
7777
BestFactor = CurrentFactor;
7773
7778
7774
7779
// If profitable add it to ProfitableVF list.
7775
- if (isMoreProfitable (CurrentFactor, ScalarFactor))
7780
+ if (isMoreProfitable (CurrentFactor, ScalarFactor, P-> hasScalarTail () ))
7776
7781
ProfitableVFs.push_back (CurrentFactor);
7777
7782
}
7778
7783
}
0 commit comments