@@ -101,29 +101,21 @@ static cl::opt<bool> SpecializeLiteralConstant(
101
101
" Enable specialization of functions that take a literal constant as an "
102
102
" argument" ));
103
103
104
- // Estimates the instruction cost of all the basic blocks in \p WorkList.
105
- // The successors of such blocks are added to the list as long as they are
106
- // executable and they have a unique predecessor. \p WorkList represents
107
- // the basic blocks of a specialization which become dead once we replace
108
- // instructions that are known to be constants. The aim here is to estimate
109
- // the combination of size and latency savings in comparison to the non
110
- // specialized version of the function.
104
+ // Estimates the codesize savings due to dead code after constant propagation.
105
+ // \p WorkList represents the basic blocks of a specialization which will
106
+ // eventually become dead once we replace instructions that are known to be
107
+ // constants. The successors of such blocks are added to the list as long as
108
+ // the \p Solver found they were executable prior to specialization, and only
109
+ // if they have a unique predecessor.
111
110
static Cost estimateBasicBlocks (SmallVectorImpl<BasicBlock *> &WorkList,
112
111
DenseSet<BasicBlock *> &DeadBlocks,
113
112
ConstMap &KnownConstants, SCCPSolver &Solver,
114
- BlockFrequencyInfo &BFI,
115
113
TargetTransformInfo &TTI) {
116
- Cost Bonus = 0 ;
117
-
114
+ Cost CodeSize = 0 ;
118
115
// Accumulate the instruction cost of each basic block weighted by frequency.
119
116
while (!WorkList.empty ()) {
120
117
BasicBlock *BB = WorkList.pop_back_val ();
121
118
122
- uint64_t Weight = BFI.getBlockFreq (BB).getFrequency () /
123
- BFI.getEntryFreq ();
124
- if (!Weight)
125
- continue ;
126
-
127
119
// These blocks are considered dead as far as the InstCostVisitor is
128
120
// concerned. They haven't been proven dead yet by the Solver, but
129
121
// may become if we propagate the constant specialization arguments.
@@ -139,11 +131,11 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
139
131
if (KnownConstants.contains (&I))
140
132
continue ;
141
133
142
- Bonus += Weight *
143
- TTI.getInstructionCost (&I, TargetTransformInfo::TCK_SizeAndLatency);
134
+ Cost C = TTI.getInstructionCost (&I, TargetTransformInfo::TCK_CodeSize);
144
135
145
- LLVM_DEBUG (dbgs () << " FnSpecialization: Bonus " << Bonus
146
- << " after user " << I << " \n " );
136
+ LLVM_DEBUG (dbgs () << " FnSpecialization: CodeSize " << C
137
+ << " for user " << I << " \n " );
138
+ CodeSize += C;
147
139
}
148
140
149
141
// Keep adding dead successors to the list as long as they are
@@ -153,7 +145,7 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
153
145
SuccBB->getUniquePredecessor () == BB)
154
146
WorkList.push_back (SuccBB);
155
147
}
156
- return Bonus ;
148
+ return CodeSize ;
157
149
}
158
150
159
151
static Constant *findConstantFor (Value *V, ConstMap &KnownConstants) {
@@ -164,49 +156,51 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
164
156
return nullptr ;
165
157
}
166
158
167
- Cost InstCostVisitor::getBonusFromPendingPHIs () {
168
- Cost Bonus = 0 ;
159
+ Bonus InstCostVisitor::getBonusFromPendingPHIs () {
160
+ Bonus B ;
169
161
while (!PendingPHIs.empty ()) {
170
162
Instruction *Phi = PendingPHIs.pop_back_val ();
171
- Bonus += getUserBonus (Phi);
163
+ B += getUserBonus (Phi);
172
164
}
173
- return Bonus ;
165
+ return B ;
174
166
}
175
167
176
- Cost InstCostVisitor::getUserBonus (Instruction *User, Value *Use, Constant *C) {
168
+ Bonus InstCostVisitor::getUserBonus (Instruction *User, Value *Use, Constant *C) {
177
169
// Cache the iterator before visiting.
178
170
LastVisited = Use ? KnownConstants.insert ({Use, C}).first
179
171
: KnownConstants.end ();
180
172
181
- if (auto *I = dyn_cast<SwitchInst>(User))
182
- return estimateSwitchInst (*I);
183
-
184
- if (auto *I = dyn_cast<BranchInst>(User))
185
- return estimateBranchInst (*I);
186
-
187
- C = visit (*User);
188
- if (!C)
189
- return 0 ;
173
+ Cost CodeSize = 0 ;
174
+ if (auto *I = dyn_cast<SwitchInst>(User)) {
175
+ CodeSize = estimateSwitchInst (*I);
176
+ } else if (auto *I = dyn_cast<BranchInst>(User)) {
177
+ CodeSize = estimateBranchInst (*I);
178
+ } else {
179
+ C = visit (*User);
180
+ if (!C)
181
+ return {0 , 0 };
182
+ KnownConstants.insert ({User, C});
183
+ }
190
184
191
- KnownConstants. insert ({ User, C} );
185
+ CodeSize += TTI. getInstructionCost ( User, TargetTransformInfo::TCK_CodeSize );
192
186
193
187
uint64_t Weight = BFI.getBlockFreq (User->getParent ()).getFrequency () /
194
188
BFI.getEntryFreq ();
195
- if (!Weight)
196
- return 0 ;
197
189
198
- Cost Bonus = Weight *
199
- TTI.getInstructionCost (User, TargetTransformInfo::TCK_SizeAndLatency );
190
+ Cost Latency = Weight *
191
+ TTI.getInstructionCost (User, TargetTransformInfo::TCK_Latency );
200
192
201
- LLVM_DEBUG (dbgs () << " FnSpecialization: Bonus " << Bonus
202
- << " for user " << *User << " \n " );
193
+ LLVM_DEBUG (dbgs () << " FnSpecialization: {CodeSize = " << CodeSize
194
+ << " , Latency = " << Latency << " } for user "
195
+ << *User << " \n " );
203
196
197
+ Bonus B (CodeSize, Latency);
204
198
for (auto *U : User->users ())
205
199
if (auto *UI = dyn_cast<Instruction>(U))
206
200
if (UI != User && Solver.isBlockExecutable (UI->getParent ()))
207
- Bonus += getUserBonus (UI, User, C);
201
+ B += getUserBonus (UI, User, C);
208
202
209
- return Bonus ;
203
+ return B ;
210
204
}
211
205
212
206
Cost InstCostVisitor::estimateSwitchInst (SwitchInst &I) {
@@ -232,8 +226,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
232
226
WorkList.push_back (BB);
233
227
}
234
228
235
- return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, BFI,
236
- TTI);
229
+ return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, TTI);
237
230
}
238
231
239
232
Cost InstCostVisitor::estimateBranchInst (BranchInst &I) {
@@ -250,8 +243,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
250
243
Succ->getUniquePredecessor () == I.getParent ())
251
244
WorkList.push_back (Succ);
252
245
253
- return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, BFI,
254
- TTI);
246
+ return estimateBasicBlocks (WorkList, DeadBlocks, KnownConstants, Solver, TTI);
255
247
}
256
248
257
249
Constant *InstCostVisitor::visitPHINode (PHINode &I) {
@@ -572,13 +564,18 @@ bool FunctionSpecializer::run() {
572
564
if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
573
565
continue ;
574
566
567
+ int64_t Sz = *Metrics.NumInsts .getValue ();
568
+ assert (Sz > 0 && " CodeSize should be positive" );
569
+ // It is safe to down cast from int64_t, NumInsts is always positive.
570
+ unsigned SpecCost = static_cast <unsigned >(Sz);
571
+
575
572
LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization cost for "
576
- << F.getName () << " is " << Metrics. NumInsts << " \n " );
573
+ << F.getName () << " is " << SpecCost << " \n " );
577
574
578
575
if (Inserted && Metrics.isRecursive )
579
576
promoteConstantStackValues (&F);
580
577
581
- if (!findSpecializations (&F, Metrics. NumInsts , AllSpecs, SM)) {
578
+ if (!findSpecializations (&F, SpecCost , AllSpecs, SM)) {
582
579
LLVM_DEBUG (
583
580
dbgs () << " FnSpecialization: No possible specializations found for "
584
581
<< F.getName () << " \n " );
@@ -713,7 +710,7 @@ static Function *cloneCandidateFunction(Function *F) {
713
710
return Clone;
714
711
}
715
712
716
- bool FunctionSpecializer::findSpecializations (Function *F, Cost SpecCost,
713
+ bool FunctionSpecializer::findSpecializations (Function *F, unsigned SpecCost,
717
714
SmallVectorImpl<Spec> &AllSpecs,
718
715
SpecMap &SM) {
719
716
// A mapping from a specialisation signature to the index of the respective
@@ -779,21 +776,22 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
779
776
AllSpecs[Index].CallSites .push_back (&CS);
780
777
} else {
781
778
// Calculate the specialisation gain.
782
- Cost Score = 0 ;
779
+ Bonus B ;
783
780
InstCostVisitor Visitor = getInstCostVisitorFor (F);
784
781
for (ArgInfo &A : S.Args )
785
- Score += getSpecializationBonus (A.Formal , A.Actual , Visitor);
786
- Score += Visitor.getBonusFromPendingPHIs ();
782
+ B += getSpecializationBonus (A.Formal , A.Actual , Visitor);
783
+ B += Visitor.getBonusFromPendingPHIs ();
787
784
788
- LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization score = "
789
- << Score << " \n " );
785
+ LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization score {CodeSize = "
786
+ << B.CodeSize << " , Latency = " << B.Latency
787
+ << " }\n " );
790
788
791
789
// Discard unprofitable specialisations.
792
- if (!ForceSpecialization && Score <= SpecCost)
790
+ if (!ForceSpecialization && B. Latency <= SpecCost - B. CodeSize )
793
791
continue ;
794
792
795
793
// Create a new specialisation entry.
796
- auto &Spec = AllSpecs.emplace_back (F, S, Score );
794
+ auto &Spec = AllSpecs.emplace_back (F, S, B. Latency );
797
795
if (CS.getFunction () != F)
798
796
Spec.CallSites .push_back (&CS);
799
797
const unsigned Index = AllSpecs.size () - 1 ;
@@ -860,19 +858,20 @@ Function *FunctionSpecializer::createSpecialization(Function *F,
860
858
}
861
859
862
860
// / Compute a bonus for replacing argument \p A with constant \p C.
863
- Cost FunctionSpecializer::getSpecializationBonus (Argument *A, Constant *C,
861
+ Bonus FunctionSpecializer::getSpecializationBonus (Argument *A, Constant *C,
864
862
InstCostVisitor &Visitor) {
865
863
LLVM_DEBUG (dbgs () << " FnSpecialization: Analysing bonus for constant: "
866
864
<< C->getNameOrAsOperand () << " \n " );
867
865
868
- Cost TotalCost = 0 ;
866
+ Bonus B ;
869
867
for (auto *U : A->users ())
870
868
if (auto *UI = dyn_cast<Instruction>(U))
871
869
if (Solver.isBlockExecutable (UI->getParent ()))
872
- TotalCost += Visitor.getUserBonus (UI, A, C);
870
+ B += Visitor.getUserBonus (UI, A, C);
873
871
874
- LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated user bonus "
875
- << TotalCost << " for argument " << *A << " \n " );
872
+ LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated bonus {CodeSize = "
873
+ << B.CodeSize << " , Latency = " << B.Latency
874
+ << " } for argument " << *A << " \n " );
876
875
877
876
// The below heuristic is only concerned with exposing inlining
878
877
// opportunities via indirect call promotion. If the argument is not a
@@ -882,7 +881,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
882
881
// while traversing the users of the specialization arguments ?
883
882
Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts ());
884
883
if (!CalledFunction)
885
- return TotalCost ;
884
+ return B ;
886
885
887
886
// Get TTI for the called function (used for the inline cost).
888
887
auto &CalleeTTI = (GetTTI)(*CalledFunction);
@@ -892,7 +891,7 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
892
891
// calls to be promoted to direct calls. If the indirect call promotion
893
892
// would likely enable the called function to be inlined, specializing is a
894
893
// good idea.
895
- int Bonus = 0 ;
894
+ int InliningBonus = 0 ;
896
895
for (User *U : A->users ()) {
897
896
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
898
897
continue ;
@@ -919,15 +918,15 @@ Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
919
918
// We clamp the bonus for this call to be between zero and the default
920
919
// threshold.
921
920
if (IC.isAlways ())
922
- Bonus += Params.DefaultThreshold ;
921
+ InliningBonus += Params.DefaultThreshold ;
923
922
else if (IC.isVariable () && IC.getCostDelta () > 0 )
924
- Bonus += IC.getCostDelta ();
923
+ InliningBonus += IC.getCostDelta ();
925
924
926
- LLVM_DEBUG (dbgs () << " FnSpecialization: Inlining bonus " << Bonus
925
+ LLVM_DEBUG (dbgs () << " FnSpecialization: Inlining bonus " << InliningBonus
927
926
<< " for user " << *U << " \n " );
928
927
}
929
928
930
- return TotalCost + Bonus ;
929
+ return B += { 0 , InliningBonus} ;
931
930
}
932
931
933
932
// / Determine if it is possible to specialise the function for constant values
0 commit comments