Skip to content

Commit bf293b4

Browse files
committed
[ctx_prof] Add Inlining support
1 parent aaed557 commit bf293b4

File tree

9 files changed

+370
-1
lines changed

9 files changed

+370
-1
lines changed

llvm/include/llvm/Analysis/CtxProfAnalysis.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ class PGOContextualProfile {
6262
bool isFunctionKnown(const Function &F) const {
6363
return getDefinedFunctionGUID(F) != 0;
6464
}
65+
66+
uint32_t getNrCounters(const Function &F) const {
67+
assert(isFunctionKnown(F));
68+
return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex;
69+
}
70+
71+
uint32_t getNrCallsites(const Function &F) const {
72+
assert(isFunctionKnown(F));
73+
return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex;
74+
}
6575

6676
uint32_t allocateNextCounterIndex(const Function &F) {
6777
assert(isFunctionKnown(F));

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,6 +1516,10 @@ class InstrProfInstBase : public IntrinsicInst {
15161516
return const_cast<Value *>(getArgOperand(0))->stripPointerCasts();
15171517
}
15181518

1519+
void setNameValue(Value *V) {
1520+
setArgOperand(0, V);
1521+
}
1522+
15191523
// The hash of the CFG for the instrumented function.
15201524
ConstantInt *getHash() const {
15211525
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));

llvm/include/llvm/ProfileData/PGOCtxProfReader.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ class PGOCtxProfContext final {
7474
Iter->second.emplace(Other.guid(), std::move(Other));
7575
}
7676

77+
void ingestAllContexts(uint32_t CSId, CallTargetMapTy &&Other) {
78+
auto [_, Inserted] = callsites().try_emplace(CSId, std::move(Other));
79+
assert(Inserted);
80+
}
81+
7782
void resizeCounters(uint32_t Size) { Counters.resize(Size); }
7883

7984
bool hasCallsite(uint32_t I) const {

llvm/include/llvm/Transforms/Utils/Cloning.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/ADT/SmallVector.h"
2121
#include "llvm/ADT/Twine.h"
2222
#include "llvm/Analysis/AssumptionCache.h"
23+
#include "llvm/Analysis/CtxProfAnalysis.h"
2324
#include "llvm/Analysis/InlineCost.h"
2425
#include "llvm/IR/BasicBlock.h"
2526
#include "llvm/IR/ValueHandle.h"
@@ -270,6 +271,14 @@ InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
270271
bool InsertLifetime = true,
271272
Function *ForwardVarArgsTo = nullptr);
272273

274+
/// Same as above, but it will update the contextual profile.
275+
InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
276+
CtxProfAnalysis::Result &CtxProf,
277+
bool MergeAttributes = false,
278+
AAResults *CalleeAAR = nullptr,
279+
bool InsertLifetime = true,
280+
Function *ForwardVarArgsTo = nullptr);
281+
273282
/// Clones a loop \p OrigLoop. Returns the loop and the blocks in \p
274283
/// Blocks.
275284
///

llvm/lib/Analysis/CtxProfAnalysis.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ PGOContextualProfile CtxProfAnalysis::run(Module &M,
150150
// If we made it this far, the Result is valid - which we mark by setting
151151
// .Profiles.
152152
// Trim first the roots that aren't in this module.
153-
DenseSet<GlobalValue::GUID> ProfiledGUIDs;
154153
for (auto &[RootGuid, _] : llvm::make_early_inc_range(*MaybeCtx))
155154
if (!Result.FuncInfo.contains(RootGuid))
156155
MaybeCtx->erase(RootGuid);

llvm/lib/Transforms/IPO/ModuleInliner.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/Analysis/AliasAnalysis.h"
2121
#include "llvm/Analysis/AssumptionCache.h"
2222
#include "llvm/Analysis/BlockFrequencyInfo.h"
23+
#include "llvm/Analysis/CtxProfAnalysis.h"
2324
#include "llvm/Analysis/InlineAdvisor.h"
2425
#include "llvm/Analysis/InlineCost.h"
2526
#include "llvm/Analysis/InlineOrder.h"

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/Analysis/BlockFrequencyInfo.h"
2424
#include "llvm/Analysis/CallGraph.h"
2525
#include "llvm/Analysis/CaptureTracking.h"
26+
#include "llvm/Analysis/CtxProfAnalysis.h"
2627
#include "llvm/Analysis/IndirectCallVisitor.h"
2728
#include "llvm/Analysis/InstructionSimplify.h"
2829
#include "llvm/Analysis/MemoryProfileInfo.h"
@@ -46,6 +47,7 @@
4647
#include "llvm/IR/Dominators.h"
4748
#include "llvm/IR/EHPersonalities.h"
4849
#include "llvm/IR/Function.h"
50+
#include "llvm/IR/GlobalVariable.h"
4951
#include "llvm/IR/IRBuilder.h"
5052
#include "llvm/IR/InlineAsm.h"
5153
#include "llvm/IR/InstrTypes.h"
@@ -71,6 +73,7 @@
7173
#include <algorithm>
7274
#include <cassert>
7375
#include <cstdint>
76+
#include <deque>
7477
#include <iterator>
7578
#include <limits>
7679
#include <optional>
@@ -2116,6 +2119,168 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
21162119
}
21172120
}
21182121

2122+
static const std::pair<std::vector<int64_t>, std::vector<int64_t>>
2123+
remapIndices(Function &Caller, BasicBlock *StartBB,
2124+
CtxProfAnalysis::Result &CtxProf, uint32_t CalleeCounters,
2125+
uint32_t CalleeCallsites) {
2126+
// We'll allocate a new ID to imported callsite counters and callsites. We're
2127+
// using -1 to indicate a counter we delete. Most likely the entry, for
2128+
// example, will be deleted - we don't want 2 IDs in the same BB, and the
2129+
// entry would have been cloned in the callsite's old BB.
2130+
std::vector<int64_t> CalleeCounterMap;
2131+
std::vector<int64_t> CalleeCallsiteMap;
2132+
CalleeCounterMap.resize(CalleeCounters, -1);
2133+
CalleeCallsiteMap.resize(CalleeCallsites, -1);
2134+
2135+
auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool {
2136+
if (Ins.getNameValue() == &Caller)
2137+
return false;
2138+
const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2139+
if (CalleeCounterMap[OldID] == -1)
2140+
CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(Caller);
2141+
const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]);
2142+
2143+
Ins.setNameValue(&Caller);
2144+
Ins.setIndex(NewID);
2145+
return true;
2146+
};
2147+
2148+
auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins)-> bool {
2149+
if (Ins.getNameValue() == &Caller)
2150+
return false;
2151+
const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue());
2152+
if (CalleeCallsiteMap[OldID] == -1)
2153+
CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(Caller);
2154+
const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]);
2155+
2156+
Ins.setNameValue(&Caller);
2157+
Ins.setIndex(NewID);
2158+
return true;
2159+
};
2160+
2161+
std::deque<BasicBlock*> Worklist;
2162+
DenseSet<const BasicBlock*> Seen;
2163+
// We will traverse the BBs starting from the callsite BB. The callsite BB
2164+
// will have at least a BB ID - maybe its own, and in any case the one coming
2165+
// from the cloned function's entry BB. The other BBs we'll start seeing from
2166+
// there on may or may not have BB IDs. BBs with IDs belonging to our caller
2167+
// are definitely not coming from the imported function and form a boundary
2168+
// past which we don't need to traverse anymore. BBs may have no
2169+
// instrumentation, in which case we'll traverse past them.
2170+
// An invariant we'll keep is that a BB will have at most 1 BB ID. For
2171+
// example, the callsite BB will delete the callee BB's instrumentation. This
2172+
// doesn't result in information loss: the entry BB of the caller will have
2173+
// the same count as the callsite's BB.
2174+
// At the end of this traversal, all the callee's instrumentation would be
2175+
// mapped into the caller's instrumentation index space. Some of the callee's
2176+
// counters may be deleted (as mentioned, this should result in no loss of
2177+
// information).
2178+
Worklist.push_back(StartBB);
2179+
while (!Worklist.empty()) {
2180+
auto *BB = Worklist.front();
2181+
Worklist.pop_front();
2182+
bool Changed = false;
2183+
auto *BBID = CtxProfAnalysis::getBBInstrumentation(*BB);
2184+
if (BBID) {
2185+
Changed |= RewriteInstrIfNeeded(*BBID);
2186+
// this may be the entryblock from the inlined callee, coming into a BB
2187+
// that didn't have instrumentation because of MST decisions. Let's make
2188+
// sure it's placed accordingly. This is a noop elsewhere.
2189+
BBID->moveBefore(&*BB->getFirstInsertionPt());
2190+
}
2191+
for (auto &I : llvm::make_early_inc_range(*BB)) {
2192+
if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) {
2193+
if (Inc != BBID) {
2194+
Inc->eraseFromParent();
2195+
Changed = true;
2196+
}
2197+
} else if (auto *CS = dyn_cast<InstrProfCallsite>(&I)) {
2198+
Changed |= RewriteCallsiteInsIfNeeded(*CS);
2199+
}
2200+
}
2201+
if (!BBID || Changed)
2202+
for (auto *Succ : successors(BB))
2203+
if (Seen.insert(Succ).second)
2204+
Worklist.push_back(Succ);
2205+
}
2206+
return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)};
2207+
}
2208+
2209+
llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
2210+
CtxProfAnalysis::Result &CtxProf,
2211+
bool MergeAttributes,
2212+
AAResults *CalleeAAR,
2213+
bool InsertLifetime,
2214+
Function *ForwardVarArgsTo) {
2215+
auto &Caller = *CB.getCaller();
2216+
auto &Callee = *CB.getCalledFunction();
2217+
auto *StartBB = CB.getParent();
2218+
2219+
const auto CalleeGUID = AssignGUIDPass::getGUID(Callee);
2220+
auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB);
2221+
const auto CallsiteID =
2222+
static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue());
2223+
2224+
const auto CalleeCounters = CtxProf.getNrCounters(Callee);
2225+
const auto CalleeCallsites = CtxProf.getNrCallsites(Callee);
2226+
2227+
auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime,
2228+
ForwardVarArgsTo);
2229+
if (!Ret.isSuccess())
2230+
return Ret;
2231+
2232+
// We don't have that callsite anymore.
2233+
CallsiteIDIns->eraseFromParent();
2234+
2235+
// Assinging Maps and then capturing references into it in the lambda because
2236+
// captured structured bindings are a C++20 extension. We do also need a
2237+
// capture here, though.
2238+
const auto Maps =
2239+
remapIndices(Caller, StartBB, CtxProf, CalleeCounters, CalleeCallsites);
2240+
const auto &[CalleeCounterMap, _] = Maps;
2241+
const uint32_t NewCountersSize = CtxProf.getNrCounters(Caller);
2242+
2243+
auto Updater = [&](PGOCtxProfContext &Ctx) {
2244+
assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller));
2245+
const auto &[CalleeCounterMap, CalleeCallsiteMap] = Maps;
2246+
assert(
2247+
(Ctx.counters().size() +
2248+
llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) ==
2249+
NewCountersSize) &&
2250+
"The caller's counters size should have grown by the number of new "
2251+
"distinct counters inherited from the inlined callee.");
2252+
Ctx.resizeCounters(NewCountersSize);
2253+
// If the callsite wasn't exercised in this context, the value of the
2254+
// counters coming from it is 0 and so we're done.
2255+
auto CSIt = Ctx.callsites().find(CallsiteID);
2256+
if (CSIt == Ctx.callsites().end())
2257+
return;
2258+
auto CalleeCtxIt = CSIt->second.find(CalleeGUID);
2259+
// The callsite was exercised, but not with this callee (so presumably this
2260+
// is an indirect callsite). Again we're done.
2261+
if (CalleeCtxIt == CSIt->second.end())
2262+
return;
2263+
auto &CalleeCtx = CalleeCtxIt->second;
2264+
assert(CalleeCtx.guid() == CalleeGUID);
2265+
2266+
for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) {
2267+
const int64_t NewIndex = CalleeCounterMap[I];
2268+
if (NewIndex >= 0)
2269+
Ctx.counters()[NewIndex] = CalleeCtx.counters()[I];
2270+
}
2271+
for (auto &[I, OtherSet] : CalleeCtx.callsites()) {
2272+
const int64_t NewCSIdx = CalleeCallsiteMap[I];
2273+
if (NewCSIdx >= 0)
2274+
Ctx.ingestAllContexts(NewCSIdx, std::move(OtherSet));
2275+
}
2276+
auto Deleted = Ctx.callsites().erase(CallsiteID);
2277+
assert(Deleted);
2278+
(void)Deleted;
2279+
};
2280+
CtxProf.update(Updater, &Caller);
2281+
return Ret;
2282+
}
2283+
21192284
/// This function inlines the called function into the basic block of the
21202285
/// caller. This returns false if it is not possible to inline this call.
21212286
/// The program is still in a well defined state if this occurs though.

llvm/unittests/Transforms/Utils/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ add_llvm_unittest(UtilsTests
1919
CodeMoverUtilsTest.cpp
2020
DebugifyTest.cpp
2121
FunctionComparatorTest.cpp
22+
InlineFunctionTest.cpp
2223
IntegerDivisionTest.cpp
2324
LocalTest.cpp
2425
LoopRotationUtilsTest.cpp

0 commit comments

Comments
 (0)