|
23 | 23 | #include "llvm/Analysis/BlockFrequencyInfo.h"
|
24 | 24 | #include "llvm/Analysis/CallGraph.h"
|
25 | 25 | #include "llvm/Analysis/CaptureTracking.h"
|
| 26 | +#include "llvm/Analysis/CtxProfAnalysis.h" |
26 | 27 | #include "llvm/Analysis/IndirectCallVisitor.h"
|
27 | 28 | #include "llvm/Analysis/InstructionSimplify.h"
|
28 | 29 | #include "llvm/Analysis/MemoryProfileInfo.h"
|
|
46 | 47 | #include "llvm/IR/Dominators.h"
|
47 | 48 | #include "llvm/IR/EHPersonalities.h"
|
48 | 49 | #include "llvm/IR/Function.h"
|
| 50 | +#include "llvm/IR/GlobalVariable.h" |
49 | 51 | #include "llvm/IR/IRBuilder.h"
|
50 | 52 | #include "llvm/IR/InlineAsm.h"
|
51 | 53 | #include "llvm/IR/InstrTypes.h"
|
|
71 | 73 | #include <algorithm>
|
72 | 74 | #include <cassert>
|
73 | 75 | #include <cstdint>
|
| 76 | +#include <deque> |
74 | 77 | #include <iterator>
|
75 | 78 | #include <limits>
|
76 | 79 | #include <optional>
|
@@ -2116,6 +2119,168 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
|
2116 | 2119 | }
|
2117 | 2120 | }
|
2118 | 2121 |
|
| 2122 | +static const std::pair<std::vector<int64_t>, std::vector<int64_t>> |
| 2123 | +remapIndices(Function &Caller, BasicBlock *StartBB, |
| 2124 | + CtxProfAnalysis::Result &CtxProf, uint32_t CalleeCounters, |
| 2125 | + uint32_t CalleeCallsites) { |
| 2126 | + // We'll allocate a new ID to imported callsite counters and callsites. We're |
| 2127 | + // using -1 to indicate a counter we delete. Most likely the entry, for |
| 2128 | + // example, will be deleted - we don't want 2 IDs in the same BB, and the |
| 2129 | + // entry would have been cloned in the callsite's old BB. |
| 2130 | + std::vector<int64_t> CalleeCounterMap; |
| 2131 | + std::vector<int64_t> CalleeCallsiteMap; |
| 2132 | + CalleeCounterMap.resize(CalleeCounters, -1); |
| 2133 | + CalleeCallsiteMap.resize(CalleeCallsites, -1); |
| 2134 | + |
| 2135 | + auto RewriteInstrIfNeeded = [&](InstrProfIncrementInst &Ins) -> bool { |
| 2136 | + if (Ins.getNameValue() == &Caller) |
| 2137 | + return false; |
| 2138 | + const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); |
| 2139 | + if (CalleeCounterMap[OldID] == -1) |
| 2140 | + CalleeCounterMap[OldID] = CtxProf.allocateNextCounterIndex(Caller); |
| 2141 | + const auto NewID = static_cast<uint32_t>(CalleeCounterMap[OldID]); |
| 2142 | + |
| 2143 | + Ins.setNameValue(&Caller); |
| 2144 | + Ins.setIndex(NewID); |
| 2145 | + return true; |
| 2146 | + }; |
| 2147 | + |
| 2148 | + auto RewriteCallsiteInsIfNeeded = [&](InstrProfCallsite &Ins)-> bool { |
| 2149 | + if (Ins.getNameValue() == &Caller) |
| 2150 | + return false; |
| 2151 | + const auto OldID = static_cast<uint32_t>(Ins.getIndex()->getZExtValue()); |
| 2152 | + if (CalleeCallsiteMap[OldID] == -1) |
| 2153 | + CalleeCallsiteMap[OldID] = CtxProf.allocateNextCallsiteIndex(Caller); |
| 2154 | + const auto NewID = static_cast<uint32_t>(CalleeCallsiteMap[OldID]); |
| 2155 | + |
| 2156 | + Ins.setNameValue(&Caller); |
| 2157 | + Ins.setIndex(NewID); |
| 2158 | + return true; |
| 2159 | + }; |
| 2160 | + |
| 2161 | + std::deque<BasicBlock*> Worklist; |
| 2162 | + DenseSet<const BasicBlock*> Seen; |
| 2163 | + // We will traverse the BBs starting from the callsite BB. The callsite BB |
| 2164 | + // will have at least a BB ID - maybe its own, and in any case the one coming |
| 2165 | + // from the cloned function's entry BB. The other BBs we'll start seeing from |
| 2166 | + // there on may or may not have BB IDs. BBs with IDs belonging to our caller |
| 2167 | + // are definitely not coming from the imported function and form a boundary |
| 2168 | + // past which we don't need to traverse anymore. BBs may have no |
| 2169 | + // instrumentation, in which case we'll traverse past them. |
| 2170 | + // An invariant we'll keep is that a BB will have at most 1 BB ID. For |
| 2171 | + // example, the callsite BB will delete the callee BB's instrumentation. This |
| 2172 | + // doesn't result in information loss: the entry BB of the caller will have |
| 2173 | + // the same count as the callsite's BB. |
| 2174 | + // At the end of this traversal, all the callee's instrumentation would be |
| 2175 | + // mapped into the caller's instrumentation index space. Some of the callee's |
| 2176 | + // counters may be deleted (as mentioned, this should result in no loss of |
| 2177 | + // information). |
| 2178 | + Worklist.push_back(StartBB); |
| 2179 | + while (!Worklist.empty()) { |
| 2180 | + auto *BB = Worklist.front(); |
| 2181 | + Worklist.pop_front(); |
| 2182 | + bool Changed = false; |
| 2183 | + auto *BBID = CtxProfAnalysis::getBBInstrumentation(*BB); |
| 2184 | + if (BBID) { |
| 2185 | + Changed |= RewriteInstrIfNeeded(*BBID); |
| 2186 | + // this may be the entryblock from the inlined callee, coming into a BB |
| 2187 | + // that didn't have instrumentation because of MST decisions. Let's make |
| 2188 | + // sure it's placed accordingly. This is a noop elsewhere. |
| 2189 | + BBID->moveBefore(&*BB->getFirstInsertionPt()); |
| 2190 | + } |
| 2191 | + for (auto &I : llvm::make_early_inc_range(*BB)) { |
| 2192 | + if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) { |
| 2193 | + if (Inc != BBID) { |
| 2194 | + Inc->eraseFromParent(); |
| 2195 | + Changed = true; |
| 2196 | + } |
| 2197 | + } else if (auto *CS = dyn_cast<InstrProfCallsite>(&I)) { |
| 2198 | + Changed |= RewriteCallsiteInsIfNeeded(*CS); |
| 2199 | + } |
| 2200 | + } |
| 2201 | + if (!BBID || Changed) |
| 2202 | + for (auto *Succ : successors(BB)) |
| 2203 | + if (Seen.insert(Succ).second) |
| 2204 | + Worklist.push_back(Succ); |
| 2205 | + } |
| 2206 | + return {std::move(CalleeCounterMap), std::move(CalleeCallsiteMap)}; |
| 2207 | +} |
| 2208 | + |
| 2209 | +llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, |
| 2210 | + CtxProfAnalysis::Result &CtxProf, |
| 2211 | + bool MergeAttributes, |
| 2212 | + AAResults *CalleeAAR, |
| 2213 | + bool InsertLifetime, |
| 2214 | + Function *ForwardVarArgsTo) { |
| 2215 | + auto &Caller = *CB.getCaller(); |
| 2216 | + auto &Callee = *CB.getCalledFunction(); |
| 2217 | + auto *StartBB = CB.getParent(); |
| 2218 | + |
| 2219 | + const auto CalleeGUID = AssignGUIDPass::getGUID(Callee); |
| 2220 | + auto *CallsiteIDIns = CtxProfAnalysis::getCallsiteInstrumentation(CB); |
| 2221 | + const auto CallsiteID = |
| 2222 | + static_cast<uint32_t>(CallsiteIDIns->getIndex()->getZExtValue()); |
| 2223 | + |
| 2224 | + const auto CalleeCounters = CtxProf.getNrCounters(Callee); |
| 2225 | + const auto CalleeCallsites = CtxProf.getNrCallsites(Callee); |
| 2226 | + |
| 2227 | + auto Ret = InlineFunction(CB, IFI, MergeAttributes, CalleeAAR, InsertLifetime, |
| 2228 | + ForwardVarArgsTo); |
| 2229 | + if (!Ret.isSuccess()) |
| 2230 | + return Ret; |
| 2231 | + |
| 2232 | + // We don't have that callsite anymore. |
| 2233 | + CallsiteIDIns->eraseFromParent(); |
| 2234 | + |
| 2235 | + // Assinging Maps and then capturing references into it in the lambda because |
| 2236 | + // captured structured bindings are a C++20 extension. We do also need a |
| 2237 | + // capture here, though. |
| 2238 | + const auto Maps = |
| 2239 | + remapIndices(Caller, StartBB, CtxProf, CalleeCounters, CalleeCallsites); |
| 2240 | + const auto &[CalleeCounterMap, _] = Maps; |
| 2241 | + const uint32_t NewCountersSize = CtxProf.getNrCounters(Caller); |
| 2242 | + |
| 2243 | + auto Updater = [&](PGOCtxProfContext &Ctx) { |
| 2244 | + assert(Ctx.guid() == AssignGUIDPass::getGUID(Caller)); |
| 2245 | + const auto &[CalleeCounterMap, CalleeCallsiteMap] = Maps; |
| 2246 | + assert( |
| 2247 | + (Ctx.counters().size() + |
| 2248 | + llvm::count_if(CalleeCounterMap, [](auto V) { return V != -1; }) == |
| 2249 | + NewCountersSize) && |
| 2250 | + "The caller's counters size should have grown by the number of new " |
| 2251 | + "distinct counters inherited from the inlined callee."); |
| 2252 | + Ctx.resizeCounters(NewCountersSize); |
| 2253 | + // If the callsite wasn't exercised in this context, the value of the |
| 2254 | + // counters coming from it is 0 and so we're done. |
| 2255 | + auto CSIt = Ctx.callsites().find(CallsiteID); |
| 2256 | + if (CSIt == Ctx.callsites().end()) |
| 2257 | + return; |
| 2258 | + auto CalleeCtxIt = CSIt->second.find(CalleeGUID); |
| 2259 | + // The callsite was exercised, but not with this callee (so presumably this |
| 2260 | + // is an indirect callsite). Again we're done. |
| 2261 | + if (CalleeCtxIt == CSIt->second.end()) |
| 2262 | + return; |
| 2263 | + auto &CalleeCtx = CalleeCtxIt->second; |
| 2264 | + assert(CalleeCtx.guid() == CalleeGUID); |
| 2265 | + |
| 2266 | + for (auto I = 0U; I < CalleeCtx.counters().size(); ++I) { |
| 2267 | + const int64_t NewIndex = CalleeCounterMap[I]; |
| 2268 | + if (NewIndex >= 0) |
| 2269 | + Ctx.counters()[NewIndex] = CalleeCtx.counters()[I]; |
| 2270 | + } |
| 2271 | + for (auto &[I, OtherSet] : CalleeCtx.callsites()) { |
| 2272 | + const int64_t NewCSIdx = CalleeCallsiteMap[I]; |
| 2273 | + if (NewCSIdx >= 0) |
| 2274 | + Ctx.ingestAllContexts(NewCSIdx, std::move(OtherSet)); |
| 2275 | + } |
| 2276 | + auto Deleted = Ctx.callsites().erase(CallsiteID); |
| 2277 | + assert(Deleted); |
| 2278 | + (void)Deleted; |
| 2279 | + }; |
| 2280 | + CtxProf.update(Updater, &Caller); |
| 2281 | + return Ret; |
| 2282 | +} |
| 2283 | + |
2119 | 2284 | /// This function inlines the called function into the basic block of the
|
2120 | 2285 | /// caller. This returns false if it is not possible to inline this call.
|
2121 | 2286 | /// The program is still in a well defined state if this occurs though.
|
|
0 commit comments