Skip to content

Commit 94737fb

Browse files
committed
Reapply "[MemProf] Reduce cloning overhead by sharing nodes when possible" (llvm#102932) with fixes
This reverts commit 11aa31f, restoring commit 055e431, with added fixes for linker unsats. In some cases multiple calls to different targets may end up with the same debug information, and therefore callsite id. We will end up sharing the node between these calls. We don't know which one matches the callees until all nodes are matched with calls, at which point any non-matching calls should be removed from the node. The fix extends the handling in handleCallsitesWithMultipleTargets to do this, and adds tests for various permutations of this situation.
1 parent 66927fb commit 94737fb

File tree

1 file changed

+109
-15
lines changed

1 file changed

+109
-15
lines changed

llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Lines changed: 109 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,16 @@ class CallsiteContextGraph {
242242
// recursion.
243243
bool Recursive = false;
244244

245-
// The corresponding allocation or interior call.
245+
// The corresponding allocation or interior call. This is the primary call
246+
// for which we have created this node.
246247
CallInfo Call;
247248

249+
// List of other calls that can be treated the same as the primary call
250+
// through cloning. I.e. located in the same function and have the same
251+
// (possibly pruned) stack ids. They will be updated the same way as the
252+
// primary call when assigning to function clones.
253+
std::vector<CallInfo> MatchingCalls;
254+
248255
// For alloc nodes this is a unique id assigned when constructed, and for
249256
// callsite stack nodes it is the original stack id when the node is
250257
// constructed from the memprof MIB metadata on the alloc nodes. Note that
@@ -457,6 +464,9 @@ class CallsiteContextGraph {
457464
/// iteration.
458465
MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
459466

467+
/// Records the function each call is located in.
468+
DenseMap<CallInfo, const FuncTy *> CallToFunc;
469+
460470
/// Map from callsite node to the enclosing caller function.
461471
std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
462472

@@ -474,7 +484,8 @@ class CallsiteContextGraph {
474484
/// StackIdToMatchingCalls map.
475485
void assignStackNodesPostOrder(
476486
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
477-
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
487+
DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls,
488+
DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
478489

479490
/// Duplicates the given set of context ids, updating the provided
480491
/// map from each original id with the newly generated context ids,
@@ -1230,10 +1241,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
12301241

12311242
template <typename DerivedCCG, typename FuncTy, typename CallTy>
12321243
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1233-
assignStackNodesPostOrder(ContextNode *Node,
1234-
DenseSet<const ContextNode *> &Visited,
1235-
DenseMap<uint64_t, std::vector<CallContextInfo>>
1236-
&StackIdToMatchingCalls) {
1244+
assignStackNodesPostOrder(
1245+
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1246+
DenseMap<uint64_t, std::vector<CallContextInfo>>
1247+
&StackIdToMatchingCalls,
1248+
DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
12371249
auto Inserted = Visited.insert(Node);
12381250
if (!Inserted.second)
12391251
return;
@@ -1246,7 +1258,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
12461258
// Skip any that have been removed during the recursion.
12471259
if (!Edge)
12481260
continue;
1249-
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
1261+
assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls,
1262+
CallToMatchingCall);
12501263
}
12511264

12521265
// If this node's stack id is in the map, update the graph to contain new
@@ -1289,8 +1302,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
12891302
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
12901303
// Skip any for which we didn't assign any ids, these don't get a node in
12911304
// the graph.
1292-
if (SavedContextIds.empty())
1305+
if (SavedContextIds.empty()) {
1306+
// If this call has a matching call (located in the same function and
1307+
// having the same stack ids), simply add it to the context node created
1308+
// for its matching call earlier. These can be treated the same through
1309+
// cloning and get updated at the same time.
1310+
if (!CallToMatchingCall.contains(Call))
1311+
continue;
1312+
auto MatchingCall = CallToMatchingCall[Call];
1313+
assert(NonAllocationCallToContextNodeMap.contains(MatchingCall));
1314+
NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls.push_back(
1315+
Call);
12931316
continue;
1317+
}
12941318

12951319
assert(LastId == Ids.back());
12961320

@@ -1422,6 +1446,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
14221446
// there is more than one call with the same stack ids. Their (possibly newly
14231447
// duplicated) context ids are saved in the StackIdToMatchingCalls map.
14241448
DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
1449+
// Save a map from each call to any that are found to match it. I.e. located
1450+
// in the same function and have the same (possibly pruned) stack ids. We use
1451+
// this to avoid creating extra graph nodes as they can be treated the same.
1452+
DenseMap<CallInfo, CallInfo> CallToMatchingCall;
14251453
for (auto &It : StackIdToMatchingCalls) {
14261454
auto &Calls = It.getSecond();
14271455
// Skip single calls with a single stack id. These don't need a new node.
@@ -1460,6 +1488,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
14601488
DenseSet<uint32_t> LastNodeContextIds = LastNode->getContextIds();
14611489
assert(!LastNodeContextIds.empty());
14621490

1491+
// Map from function to the first call from the below list (with matching
1492+
// stack ids) found in that function. Note that calls from different
1493+
// functions can have the same stack ids because this is the list of stack
1494+
// ids that had (possibly pruned) nodes after building the graph from the
1495+
// allocation MIBs.
1496+
DenseMap<const FuncTy *, CallInfo> FuncToCallMap;
1497+
14631498
for (unsigned I = 0; I < Calls.size(); I++) {
14641499
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
14651500
assert(SavedContextIds.empty());
@@ -1533,6 +1568,18 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
15331568
continue;
15341569
}
15351570

1571+
const FuncTy *CallFunc = CallToFunc[Call];
1572+
1573+
// If the prior call had the same stack ids this map would not be empty.
1574+
// Check if we already have a call that "matches" because it is located
1575+
// in the same function.
1576+
if (FuncToCallMap.contains(CallFunc)) {
1577+
// Record the matching call found for this call, and skip it. We
1578+
// will subsequently combine it into the same node.
1579+
CallToMatchingCall[Call] = FuncToCallMap[CallFunc];
1580+
continue;
1581+
}
1582+
15361583
// Check if the next set of stack ids is the same (since the Calls vector
15371584
// of tuples is sorted by the stack ids we can just look at the next one).
15381585
bool DuplicateContextIds = false;
@@ -1562,7 +1609,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
15621609
set_subtract(LastNodeContextIds, StackSequenceContextIds);
15631610
if (LastNodeContextIds.empty())
15641611
break;
1565-
}
1612+
// No longer possibly in a sequence of calls with duplicate stack ids,
1613+
// clear the map.
1614+
FuncToCallMap.clear();
1615+
} else
1616+
// Record the call with its function, so we can locate it the next time
1617+
// we find a call from this function when processing the calls with the
1618+
// same stack ids.
1619+
FuncToCallMap[CallFunc] = Call;
15661620
}
15671621
}
15681622

@@ -1579,7 +1633,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
15791633
// associated context ids over to the new nodes.
15801634
DenseSet<const ContextNode *> Visited;
15811635
for (auto &Entry : AllocationCallToContextNodeMap)
1582-
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
1636+
assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls,
1637+
CallToMatchingCall);
15831638
if (VerifyCCG)
15841639
check();
15851640
}
@@ -1679,6 +1734,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
16791734
continue;
16801735
if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
16811736
CallsWithMetadata.push_back(&I);
1737+
CallToFunc[&I] = &F;
16821738
auto *AllocNode = addAllocNode(&I, &F);
16831739
auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
16841740
assert(CallsiteMD);
@@ -1700,8 +1756,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
17001756
I.setMetadata(LLVMContext::MD_callsite, nullptr);
17011757
}
17021758
// For callsite metadata, add to list for this function for later use.
1703-
else if (I.getMetadata(LLVMContext::MD_callsite))
1759+
else if (I.getMetadata(LLVMContext::MD_callsite)) {
17041760
CallsWithMetadata.push_back(&I);
1761+
CallToFunc[&I] = &F;
1762+
}
17051763
}
17061764
}
17071765
if (!CallsWithMetadata.empty())
@@ -1756,8 +1814,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
17561814
// correlate properly in applyImport in the backends.
17571815
if (AN.MIBs.empty())
17581816
continue;
1759-
CallsWithMetadata.push_back({&AN});
1760-
auto *AllocNode = addAllocNode({&AN}, FS);
1817+
IndexCall AllocCall(&AN);
1818+
CallsWithMetadata.push_back(AllocCall);
1819+
CallToFunc[AllocCall] = FS;
1820+
auto *AllocNode = addAllocNode(AllocCall, FS);
17611821
// Pass an empty CallStack to the CallsiteContext (second)
17621822
// parameter, since for ThinLTO we already collapsed out the inlined
17631823
// stack ids on the allocation call during ModuleSummaryAnalysis.
@@ -1788,8 +1848,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
17881848
}
17891849
// For callsite metadata, add to list for this function for later use.
17901850
if (!FS->callsites().empty())
1791-
for (auto &SN : FS->mutableCallsites())
1792-
CallsWithMetadata.push_back({&SN});
1851+
for (auto &SN : FS->mutableCallsites()) {
1852+
IndexCall StackNodeCall(&SN);
1853+
CallsWithMetadata.push_back(StackNodeCall);
1854+
CallToFunc[StackNodeCall] = FS;
1855+
}
17931856

17941857
if (!CallsWithMetadata.empty())
17951858
FuncToCallsWithMetadata[FS] = CallsWithMetadata;
@@ -2225,6 +2288,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
22252288
if (Recursive)
22262289
OS << " (recursive)";
22272290
OS << "\n";
2291+
if (!MatchingCalls.empty()) {
2292+
OS << "\tMatchingCalls:\n";
2293+
for (auto &MatchingCall : MatchingCalls) {
2294+
OS << "\t";
2295+
MatchingCall.print(OS);
2296+
OS << "\n";
2297+
}
2298+
}
22282299
OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
22292300
OS << "\tContextIds:";
22302301
// Make a copy of the computed context ids that we can sort for stability.
@@ -2478,6 +2549,7 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
24782549
std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
24792550
ContextNode *Clone = NodeOwner.back().get();
24802551
Node->addClone(Clone);
2552+
Clone->MatchingCalls = Node->MatchingCalls;
24812553
assert(NodeToCallingFunc.count(Node));
24822554
NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
24832555
moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true,
@@ -3021,6 +3093,14 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
30213093
if (CallMap.count(Call))
30223094
CallClone = CallMap[Call];
30233095
CallsiteClone->setCall(CallClone);
3096+
// Need to do the same for all matching calls.
3097+
for (auto &MatchingCall : Node->MatchingCalls) {
3098+
CallInfo CallClone(MatchingCall);
3099+
if (CallMap.count(MatchingCall))
3100+
CallClone = CallMap[MatchingCall];
3101+
// Updates the call in the list.
3102+
MatchingCall = CallClone;
3103+
}
30243104
};
30253105

30263106
// Keep track of the clones of callsite Node that need to be assigned to
@@ -3187,6 +3267,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
31873267
CallInfo NewCall(CallMap[OrigCall]);
31883268
assert(NewCall);
31893269
NewClone->setCall(NewCall);
3270+
// Need to do the same for all matching calls.
3271+
for (auto &MatchingCall : NewClone->MatchingCalls) {
3272+
CallInfo OrigMatchingCall(MatchingCall);
3273+
OrigMatchingCall.setCloneNo(0);
3274+
assert(CallMap.count(OrigMatchingCall));
3275+
CallInfo NewCall(CallMap[OrigMatchingCall]);
3276+
assert(NewCall);
3277+
// Updates the call in the list.
3278+
MatchingCall = NewCall;
3279+
}
31903280
}
31913281
}
31923282
// Fall through to handling below to perform the recording of the
@@ -3373,6 +3463,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
33733463

33743464
if (Node->IsAllocation) {
33753465
updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
3466+
assert(Node->MatchingCalls.empty());
33763467
return;
33773468
}
33783469

@@ -3381,6 +3472,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
33813472

33823473
auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
33833474
updateCall(Node->Call, CalleeFunc);
3475+
// Update all the matching calls as well.
3476+
for (auto &Call : Node->MatchingCalls)
3477+
updateCall(Call, CalleeFunc);
33843478
};
33853479

33863480
// Performs DFS traversal starting from allocation nodes to update calls to

0 commit comments

Comments
 (0)