Skip to content

Commit 36fc291

Browse files
committed
[VPlan] Implement VPBlendRecipe::computeCost.
Implement VPBlendRecipe::computeCost. VPBlendRecipe is currently is also used if only the first lane is used. This also requires pre-computing costs for forced scalars and instructions considered profitable to scalarize. For those, the cost will be computed separately in the legacy cost model. This will also be needed when implementing VPReplicateRecipe::computeCost.
1 parent a199fb1 commit 36fc291

File tree

5 files changed

+528
-15
lines changed

5 files changed

+528
-15
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,6 +961,8 @@ using InstructionVFPair = std::pair<Instruction *, ElementCount>;
961961
/// TargetTransformInfo to query the different backends for the cost of
962962
/// different operations.
963963
class LoopVectorizationCostModel {
964+
friend class LoopVectorizationPlanner;
965+
964966
public:
965967
LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L,
966968
PredicatedScalarEvolution &PSE, LoopInfo *LI,
@@ -7263,6 +7265,32 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
72637265
auto BranchCost = CostCtx.getLegacyCost(BB->getTerminator(), VF);
72647266
Cost += BranchCost;
72657267
}
7268+
7269+
// Pre-compute costs for instructions that are forced-scalar or profitable to
7270+
// scalarize. Their costs will be computed separately in the legacy cost
7271+
// model.
7272+
for (Instruction *ForcedScalar : CM.ForcedScalars[VF]) {
7273+
if (CostCtx.skipCostComputation(ForcedScalar, VF.isVector()))
7274+
continue;
7275+
CostCtx.SkipCostComputation.insert(ForcedScalar);
7276+
InstructionCost ForcedCost = CostCtx.getLegacyCost(ForcedScalar, VF);
7277+
LLVM_DEBUG({
7278+
dbgs() << "Cost of " << ForcedCost << " for VF " << VF
7279+
<< ": forced scalar " << *ForcedScalar << "\n";
7280+
});
7281+
Cost += ForcedCost;
7282+
}
7283+
for (const auto &[Scalarized, ScalarCost] : CM.InstsToScalarize[VF]) {
7284+
if (CostCtx.skipCostComputation(Scalarized, VF.isVector()))
7285+
continue;
7286+
CostCtx.SkipCostComputation.insert(Scalarized);
7287+
LLVM_DEBUG({
7288+
dbgs() << "Cost of " << ScalarCost << " for VF " << VF
7289+
<< ": profitable to scalarize " << *Scalarized << "\n";
7290+
});
7291+
Cost += ScalarCost;
7292+
}
7293+
72667294
return Cost;
72677295
}
72687296

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2241,6 +2241,10 @@ class VPBlendRecipe : public VPSingleDefRecipe {
22412241
/// Generate the phi/select nodes.
22422242
void execute(VPTransformState &State) override;
22432243

2244+
/// Return the cost of this VPWidenMemoryRecipe.
2245+
InstructionCost computeCost(ElementCount VF,
2246+
VPCostContext &Ctx) const override;
2247+
22442248
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
22452249
/// Print the recipe.
22462250
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1896,6 +1896,22 @@ void VPBlendRecipe::execute(VPTransformState &State) {
18961896
State.set(this, Result, OnlyFirstLaneUsed);
18971897
}
18981898

1899+
InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
1900+
VPCostContext &Ctx) const {
1901+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1902+
1903+
// Handle cases where only the first lane is used the same way as the legacy
1904+
// cost model.
1905+
if (vputils::onlyFirstLaneUsed(this))
1906+
return Ctx.TTI.getCFInstrCost(Instruction::PHI, CostKind);
1907+
1908+
Type *ResultTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
1909+
Type *CmpTy = ToVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
1910+
return (getNumIncomingValues() - 1) *
1911+
Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
1912+
CmpInst::BAD_ICMP_PREDICATE, CostKind);
1913+
}
1914+
18991915
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
19001916
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
19011917
VPSlotTracker &SlotTracker) const {

0 commit comments

Comments
 (0)