Skip to content

Commit 20c8f58

Browse files
committed
[FuncSpec] Split the specialization bonus into CodeSize and Latency.
Currently we use a combined metric TargetTransformInfo::TCK_SizeAndLatency when estimating the specialization bonus. This is suboptimal, and in some cases erroneous. For example we shouldn't be weighting the codesize decrease attributed to constant propagation by the block frequency of the dead code. Instead only the latency savings should be weighted by block frequency. The total codesize savings from all the specialization arguments should be deducted from the specialization cost. Differential Revision: https://reviews.llvm.org/D155103
1 parent 5e8b44c commit 20c8f58

File tree

3 files changed

+174
-128
lines changed

3 files changed

+174
-128
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,49 @@ struct Spec {
108108
SpecSig Sig;
109109

110110
// Profitability of the specialization.
111-
Cost Score;
111+
unsigned Score;
112112

113113
// List of call sites, matching this specialization.
114114
SmallVector<CallBase *> CallSites;
115115

116-
Spec(Function *F, const SpecSig &S, Cost Score)
116+
Spec(Function *F, const SpecSig &S, unsigned Score)
117117
: F(F), Sig(S), Score(Score) {}
118-
Spec(Function *F, const SpecSig &&S, Cost Score)
118+
Spec(Function *F, const SpecSig &&S, unsigned Score)
119119
: F(F), Sig(S), Score(Score) {}
120120
};
121121

122+
struct Bonus {
123+
unsigned CodeSize = 0;
124+
unsigned Latency = 0;
125+
126+
Bonus() = default;
127+
128+
Bonus(Cost CodeSize, Cost Latency) {
129+
int64_t Sz = *CodeSize.getValue();
130+
int64_t Ltc = *Latency.getValue();
131+
132+
assert(Sz >= 0 && Ltc >= 0 && "CodeSize and Latency cannot be negative");
133+
// It is safe to down cast since we know the arguments
134+
// cannot be negative and Cost is of type int64_t.
135+
this->CodeSize = static_cast<unsigned>(Sz);
136+
this->Latency = static_cast<unsigned>(Ltc);
137+
}
138+
139+
Bonus &operator+=(const Bonus RHS) {
140+
CodeSize += RHS.CodeSize;
141+
Latency += RHS.Latency;
142+
return *this;
143+
}
144+
145+
Bonus operator+(const Bonus RHS) const {
146+
return Bonus(CodeSize + RHS.CodeSize, Latency + RHS.Latency);
147+
}
148+
149+
bool operator==(const Bonus RHS) const {
150+
return CodeSize == RHS.CodeSize && Latency == RHS.Latency;
151+
}
152+
};
153+
122154
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
123155
const DataLayout &DL;
124156
BlockFrequencyInfo &BFI;
@@ -143,10 +175,10 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
143175
TargetTransformInfo &TTI, SCCPSolver &Solver)
144176
: DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
145177

146-
Cost getUserBonus(Instruction *User, Value *Use = nullptr,
147-
Constant *C = nullptr);
178+
Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
179+
Constant *C = nullptr);
148180

149-
Cost getBonusFromPendingPHIs();
181+
Bonus getBonusFromPendingPHIs();
150182

151183
private:
152184
friend class InstVisitor<InstCostVisitor, Constant *>;
@@ -208,8 +240,8 @@ class FunctionSpecializer {
208240
}
209241

210242
/// Compute a bonus for replacing argument \p A with constant \p C.
211-
Cost getSpecializationBonus(Argument *A, Constant *C,
212-
InstCostVisitor &Visitor);
243+
Bonus getSpecializationBonus(Argument *A, Constant *C,
244+
InstCostVisitor &Visitor);
213245

214246
private:
215247
Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
@@ -236,7 +268,7 @@ class FunctionSpecializer {
236268
/// @param AllSpecs A vector to add potential specializations to.
237269
/// @param SM A map for a function's specialisation range
238270
/// @return True, if any potential specializations were found
239-
bool findSpecializations(Function *F, Cost SpecCost,
271+
bool findSpecializations(Function *F, unsigned SpecCost,
240272
SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
241273

242274
bool isCandidateFunction(Function *F);

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 67 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -101,29 +101,21 @@ static cl::opt<bool> SpecializeLiteralConstant(
101101
"Enable specialization of functions that take a literal constant as an "
102102
"argument"));
103103

104-
// Estimates the instruction cost of all the basic blocks in \p WorkList.
105-
// The successors of such blocks are added to the list as long as they are
106-
// executable and they have a unique predecessor. \p WorkList represents
107-
// the basic blocks of a specialization which become dead once we replace
108-
// instructions that are known to be constants. The aim here is to estimate
109-
// the combination of size and latency savings in comparison to the non
110-
// specialized version of the function.
104+
// Estimates the codesize savings due to dead code after constant propagation.
105+
// \p WorkList represents the basic blocks of a specialization which will
106+
// eventually become dead once we replace instructions that are known to be
107+
// constants. The successors of such blocks are added to the list as long as
108+
// the \p Solver found they were executable prior to specialization, and only
109+
// if they have a unique predecessor.
111110
static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
112111
DenseSet<BasicBlock *> &DeadBlocks,
113112
ConstMap &KnownConstants, SCCPSolver &Solver,
114-
BlockFrequencyInfo &BFI,
115113
TargetTransformInfo &TTI) {
116-
Cost Bonus = 0;
117-
114+
Cost CodeSize = 0;
118115
// Accumulate the instruction cost of each basic block weighted by frequency.
119116
while (!WorkList.empty()) {
120117
BasicBlock *BB = WorkList.pop_back_val();
121118

122-
uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
123-
BFI.getEntryFreq();
124-
if (!Weight)
125-
continue;
126-
127119
// These blocks are considered dead as far as the InstCostVisitor is
128120
// concerned. They haven't been proven dead yet by the Solver, but
129121
// may become if we propagate the constant specialization arguments.
@@ -139,11 +131,11 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
139131
if (KnownConstants.contains(&I))
140132
continue;
141133

142-
Bonus += Weight *
143-
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
134+
Cost C = TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
144135

145-
LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
146-
<< " after user " << I << "\n");
136+
LLVM_DEBUG(dbgs() << "FnSpecialization: CodeSize " << C
137+
<< " for user " << I << "\n");
138+
CodeSize += C;
147139
}
148140

149141
// Keep adding dead successors to the list as long as they are
@@ -153,7 +145,7 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
153145
SuccBB->getUniquePredecessor() == BB)
154146
WorkList.push_back(SuccBB);
155147
}
156-
return Bonus;
148+
return CodeSize;
157149
}
158150

159151
static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
@@ -164,49 +156,51 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
164156
return nullptr;
165157
}
166158

167-
Cost InstCostVisitor::getBonusFromPendingPHIs() {
168-
Cost Bonus = 0;
159+
Bonus InstCostVisitor::getBonusFromPendingPHIs() {
160+
Bonus B;
169161
while (!PendingPHIs.empty()) {
170162
Instruction *Phi = PendingPHIs.pop_back_val();
171-
Bonus += getUserBonus(Phi);
163+
B += getUserBonus(Phi);
172164
}
173-
return Bonus;
165+
return B;
174166
}
175167

176-
Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
168+
Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
177169
// Cache the iterator before visiting.
178170
LastVisited = Use ? KnownConstants.insert({Use, C}).first
179171
: KnownConstants.end();
180172

181-
if (auto *I = dyn_cast<SwitchInst>(User))
182-
return estimateSwitchInst(*I);
183-
184-
if (auto *I = dyn_cast<BranchInst>(User))
185-
return estimateBranchInst(*I);
186-
187-
C = visit(*User);
188-
if (!C)
189-
return 0;
173+
Cost CodeSize = 0;
174+
if (auto *I = dyn_cast<SwitchInst>(User)) {
175+
CodeSize = estimateSwitchInst(*I);
176+
} else if (auto *I = dyn_cast<BranchInst>(User)) {
177+
CodeSize = estimateBranchInst(*I);
178+
} else {
179+
C = visit(*User);
180+
if (!C)
181+
return {0, 0};
182+
KnownConstants.insert({User, C});
183+
}
190184

191-
KnownConstants.insert({User, C});
185+
CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize);
192186

193187
uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
194188
BFI.getEntryFreq();
195-
if (!Weight)
196-
return 0;
197189

198-
Cost Bonus = Weight *
199-
TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
190+
Cost Latency = Weight *
191+
TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency);
200192

201-
LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
202-
<< " for user " << *User << "\n");
193+
LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize
194+
<< ", Latency = " << Latency << "} for user "
195+
<< *User << "\n");
203196

197+
Bonus B(CodeSize, Latency);
204198
for (auto *U : User->users())
205199
if (auto *UI = dyn_cast<Instruction>(U))
206200
if (UI != User && Solver.isBlockExecutable(UI->getParent()))
207-
Bonus += getUserBonus(UI, User, C);
201+
B += getUserBonus(UI, User, C);
208202

209-
return Bonus;
203+
return B;
210204
}
211205

212206
Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
@@ -232,8 +226,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
232226
WorkList.push_back(BB);
233227
}
234228

235-
return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
236-
TTI);
229+
return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
237230
}
238231

239232
Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
@@ -250,8 +243,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
250243
Succ->getUniquePredecessor() == I.getParent())
251244
WorkList.push_back(Succ);
252245

253-
return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
254-
TTI);
246+
return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, TTI);
255247
}
256248

257249
Constant *InstCostVisitor::visitPHINode(PHINode &I) {
@@ -572,13 +564,18 @@ bool FunctionSpecializer::run() {
572564
if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
573565
continue;
574566

567+
int64_t Sz = *Metrics.NumInsts.getValue();
568+
assert(Sz > 0 && "CodeSize should be positive");
569+
// It is safe to down cast from int64_t, NumInsts is always positive.
570+
unsigned SpecCost = static_cast<unsigned>(Sz);
571+
575572
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
576-
<< F.getName() << " is " << Metrics.NumInsts << "\n");
573+
<< F.getName() << " is " << SpecCost << "\n");
577574

578575
if (Inserted && Metrics.isRecursive)
579576
promoteConstantStackValues(&F);
580577

581-
if (!findSpecializations(&F, Metrics.NumInsts, AllSpecs, SM)) {
578+
if (!findSpecializations(&F, SpecCost, AllSpecs, SM)) {
582579
LLVM_DEBUG(
583580
dbgs() << "FnSpecialization: No possible specializations found for "
584581
<< F.getName() << "\n");
@@ -713,7 +710,7 @@ static Function *cloneCandidateFunction(Function *F) {
713710
return Clone;
714711
}
715712

716-
bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
713+
bool FunctionSpecializer::findSpecializations(Function *F, unsigned SpecCost,
717714
SmallVectorImpl<Spec> &AllSpecs,
718715
SpecMap &SM) {
719716
// A mapping from a specialisation signature to the index of the respective
@@ -779,21 +776,22 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
779776
AllSpecs[Index].CallSites.push_back(&CS);
780777
} else {
781778
// Calculate the specialisation gain.
782-
Cost Score = 0;
779+
Bonus B;
783780
InstCostVisitor Visitor = getInstCostVisitorFor(F);
784781
for (ArgInfo &A : S.Args)
785-
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
786-
Score += Visitor.getBonusFromPendingPHIs();
782+
B += getSpecializationBonus(A.Formal, A.Actual, Visitor);
783+
B += Visitor.getBonusFromPendingPHIs();
787784

788-
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = "
789-
<< Score << "\n");
785+
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score {CodeSize = "
786+
<< B.CodeSize << ", Latency = " << B.Latency
787+
<< "}\n");
790788

791789
// Discard unprofitable specialisations.
792-
if (!ForceSpecialization && Score <= SpecCost)
790+
if (!ForceSpecialization && B.Latency <= SpecCost - B.CodeSize)
793791
continue;
794792

795793
// Create a new specialisation entry.
796-
auto &Spec = AllSpecs.emplace_back(F, S, Score);
794+
auto &Spec = AllSpecs.emplace_back(F, S, B.Latency);
797795
if (CS.getFunction() != F)
798796
Spec.CallSites.push_back(&CS);
799797
const unsigned Index = AllSpecs.size() - 1;
@@ -860,19 +858,20 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
860858
}
861859

862860
/// Compute a bonus for replacing argument \p A with constant \p C.
863-
Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
861+
Bonus FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
864862
InstCostVisitor &Visitor) {
865863
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
866864
<< C->getNameOrAsOperand() << "\n");
867865

868-
Cost TotalCost = 0;
866+
Bonus B;
869867
for (auto *U : A->users())
870868
if (auto *UI = dyn_cast<Instruction>(U))
871869
if (Solver.isBlockExecutable(UI->getParent()))
872-
TotalCost += Visitor.getUserBonus(UI, A, C);
870+
B += Visitor.getUserBonus(UI, A, C);
873871

874-
LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus "
875-
<< TotalCost << " for argument " << *A << "\n");
872+
LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
873+
<< B.CodeSize << ", Latency = " << B.Latency
874+
<< "} for argument " << *A << "\n");
876875

877876
// The below heuristic is only concerned with exposing inlining
878877
// opportunities via indirect call promotion. If the argument is not a
@@ -882,7 +881,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
882881
// while traversing the users of the specialization arguments ?
883882
Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
884883
if (!CalledFunction)
885-
return TotalCost;
884+
return B;
886885

887886
// Get TTI for the called function (used for the inline cost).
888887
auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -892,7 +891,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
892891
// calls to be promoted to direct calls. If the indirect call promotion
893892
// would likely enable the called function to be inlined, specializing is a
894893
// good idea.
895-
int Bonus = 0;
894+
int InliningBonus = 0;
896895
for (User *U : A->users()) {
897896
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
898897
continue;
@@ -919,15 +918,15 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
919918
// We clamp the bonus for this call to be between zero and the default
920919
// threshold.
921920
if (IC.isAlways())
922-
Bonus += Params.DefaultThreshold;
921+
InliningBonus += Params.DefaultThreshold;
923922
else if (IC.isVariable() && IC.getCostDelta() > 0)
924-
Bonus += IC.getCostDelta();
923+
InliningBonus += IC.getCostDelta();
925924

926-
LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << Bonus
925+
LLVM_DEBUG(dbgs() << "FnSpecialization: Inlining bonus " << InliningBonus
927926
<< " for user " << *U << "\n");
928927
}
929928

930-
return TotalCost + Bonus;
929+
return B += {0, InliningBonus};
931930
}
932931

933932
/// Determine if it is possible to specialise the function for constant values

0 commit comments

Comments
 (0)