@@ -242,9 +242,16 @@ class CallsiteContextGraph {
242
242
// recursion.
243
243
bool Recursive = false ;
244
244
245
- // The corresponding allocation or interior call.
245
+ // The corresponding allocation or interior call. This is the primary call
246
+ // for which we have created this node.
246
247
CallInfo Call;
247
248
249
+ // List of other calls that can be treated the same as the primary call
250
+ // through cloning. I.e. located in the same function and have the same
251
+ // (possibly pruned) stack ids. They will be updated the same way as the
252
+ // primary call when assigning to function clones.
253
+ std::vector<CallInfo> MatchingCalls;
254
+
248
255
// For alloc nodes this is a unique id assigned when constructed, and for
249
256
// callsite stack nodes it is the original stack id when the node is
250
257
// constructed from the memprof MIB metadata on the alloc nodes. Note that
@@ -457,6 +464,9 @@ class CallsiteContextGraph {
457
464
// / iteration.
458
465
MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
459
466
467
+ // / Records the function each call is located in.
468
+ DenseMap<CallInfo, const FuncTy *> CallToFunc;
469
+
460
470
// / Map from callsite node to the enclosing caller function.
461
471
std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
462
472
@@ -474,7 +484,8 @@ class CallsiteContextGraph {
474
484
// / StackIdToMatchingCalls map.
475
485
void assignStackNodesPostOrder (
476
486
ContextNode *Node, DenseSet<const ContextNode *> &Visited,
477
- DenseMap<uint64_t , std::vector<CallContextInfo>> &StackIdToMatchingCalls);
487
+ DenseMap<uint64_t , std::vector<CallContextInfo>> &StackIdToMatchingCalls,
488
+ DenseMap<CallInfo, CallInfo> &CallToMatchingCall);
478
489
479
490
// / Duplicates the given set of context ids, updating the provided
480
491
// / map from each original id with the newly generated context ids,
@@ -1230,10 +1241,11 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
1230
1241
1231
1242
template <typename DerivedCCG, typename FuncTy, typename CallTy>
1232
1243
void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1233
- assignStackNodesPostOrder (ContextNode *Node,
1234
- DenseSet<const ContextNode *> &Visited,
1235
- DenseMap<uint64_t , std::vector<CallContextInfo>>
1236
- &StackIdToMatchingCalls) {
1244
+ assignStackNodesPostOrder (
1245
+ ContextNode *Node, DenseSet<const ContextNode *> &Visited,
1246
+ DenseMap<uint64_t , std::vector<CallContextInfo>>
1247
+ &StackIdToMatchingCalls,
1248
+ DenseMap<CallInfo, CallInfo> &CallToMatchingCall) {
1237
1249
auto Inserted = Visited.insert (Node);
1238
1250
if (!Inserted.second )
1239
1251
return ;
@@ -1246,7 +1258,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1246
1258
// Skip any that have been removed during the recursion.
1247
1259
if (!Edge)
1248
1260
continue ;
1249
- assignStackNodesPostOrder (Edge->Caller , Visited, StackIdToMatchingCalls);
1261
+ assignStackNodesPostOrder (Edge->Caller , Visited, StackIdToMatchingCalls,
1262
+ CallToMatchingCall);
1250
1263
}
1251
1264
1252
1265
// If this node's stack id is in the map, update the graph to contain new
@@ -1289,8 +1302,19 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
1289
1302
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1290
1303
// Skip any for which we didn't assign any ids, these don't get a node in
1291
1304
// the graph.
1292
- if (SavedContextIds.empty ())
1305
+ if (SavedContextIds.empty ()) {
1306
+ // If this call has a matching call (located in the same function and
1307
+ // having the same stack ids), simply add it to the context node created
1308
+ // for its matching call earlier. These can be treated the same through
1309
+ // cloning and get updated at the same time.
1310
+ if (!CallToMatchingCall.contains (Call))
1311
+ continue ;
1312
+ auto MatchingCall = CallToMatchingCall[Call];
1313
+ assert (NonAllocationCallToContextNodeMap.contains (MatchingCall));
1314
+ NonAllocationCallToContextNodeMap[MatchingCall]->MatchingCalls .push_back (
1315
+ Call);
1293
1316
continue ;
1317
+ }
1294
1318
1295
1319
assert (LastId == Ids.back ());
1296
1320
@@ -1422,6 +1446,10 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1422
1446
// there is more than one call with the same stack ids. Their (possibly newly
1423
1447
// duplicated) context ids are saved in the StackIdToMatchingCalls map.
1424
1448
DenseMap<uint32_t , DenseSet<uint32_t >> OldToNewContextIds;
1449
+ // Save a map from each call to any that are found to match it. I.e. located
1450
+ // in the same function and have the same (possibly pruned) stack ids. We use
1451
+ // this to avoid creating extra graph nodes as they can be treated the same.
1452
+ DenseMap<CallInfo, CallInfo> CallToMatchingCall;
1425
1453
for (auto &It : StackIdToMatchingCalls) {
1426
1454
auto &Calls = It.getSecond ();
1427
1455
// Skip single calls with a single stack id. These don't need a new node.
@@ -1460,6 +1488,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1460
1488
DenseSet<uint32_t > LastNodeContextIds = LastNode->getContextIds ();
1461
1489
assert (!LastNodeContextIds.empty ());
1462
1490
1491
+ // Map from function to the first call from the below list (with matching
1492
+ // stack ids) found in that function. Note that calls from different
1493
+ // functions can have the same stack ids because this is the list of stack
1494
+ // ids that had (possibly pruned) nodes after building the graph from the
1495
+ // allocation MIBs.
1496
+ DenseMap<const FuncTy *, CallInfo> FuncToCallMap;
1497
+
1463
1498
for (unsigned I = 0 ; I < Calls.size (); I++) {
1464
1499
auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
1465
1500
assert (SavedContextIds.empty ());
@@ -1533,6 +1568,18 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1533
1568
continue ;
1534
1569
}
1535
1570
1571
+ const FuncTy *CallFunc = CallToFunc[Call];
1572
+
1573
+ // If the prior call had the same stack ids this map would not be empty.
1574
+ // Check if we already have a call that "matches" because it is located
1575
+ // in the same function.
1576
+ if (FuncToCallMap.contains (CallFunc)) {
1577
+ // Record the matching call found for this call, and skip it. We
1578
+ // will subsequently combine it into the same node.
1579
+ CallToMatchingCall[Call] = FuncToCallMap[CallFunc];
1580
+ continue ;
1581
+ }
1582
+
1536
1583
// Check if the next set of stack ids is the same (since the Calls vector
1537
1584
// of tuples is sorted by the stack ids we can just look at the next one).
1538
1585
bool DuplicateContextIds = false ;
@@ -1562,7 +1609,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1562
1609
set_subtract (LastNodeContextIds, StackSequenceContextIds);
1563
1610
if (LastNodeContextIds.empty ())
1564
1611
break ;
1565
- }
1612
+ // No longer possibly in a sequence of calls with duplicate stack ids,
1613
+ // clear the map.
1614
+ FuncToCallMap.clear ();
1615
+ } else
1616
+ // Record the call with its function, so we can locate it the next time
1617
+ // we find a call from this function when processing the calls with the
1618
+ // same stack ids.
1619
+ FuncToCallMap[CallFunc] = Call;
1566
1620
}
1567
1621
}
1568
1622
@@ -1579,7 +1633,8 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
1579
1633
// associated context ids over to the new nodes.
1580
1634
DenseSet<const ContextNode *> Visited;
1581
1635
for (auto &Entry : AllocationCallToContextNodeMap)
1582
- assignStackNodesPostOrder (Entry.second , Visited, StackIdToMatchingCalls);
1636
+ assignStackNodesPostOrder (Entry.second , Visited, StackIdToMatchingCalls,
1637
+ CallToMatchingCall);
1583
1638
if (VerifyCCG)
1584
1639
check ();
1585
1640
}
@@ -1679,6 +1734,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1679
1734
continue ;
1680
1735
if (auto *MemProfMD = I.getMetadata (LLVMContext::MD_memprof)) {
1681
1736
CallsWithMetadata.push_back (&I);
1737
+ CallToFunc[&I] = &F;
1682
1738
auto *AllocNode = addAllocNode (&I, &F);
1683
1739
auto *CallsiteMD = I.getMetadata (LLVMContext::MD_callsite);
1684
1740
assert (CallsiteMD);
@@ -1700,8 +1756,10 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
1700
1756
I.setMetadata (LLVMContext::MD_callsite, nullptr );
1701
1757
}
1702
1758
// For callsite metadata, add to list for this function for later use.
1703
- else if (I.getMetadata (LLVMContext::MD_callsite))
1759
+ else if (I.getMetadata (LLVMContext::MD_callsite)) {
1704
1760
CallsWithMetadata.push_back (&I);
1761
+ CallToFunc[&I] = &F;
1762
+ }
1705
1763
}
1706
1764
}
1707
1765
if (!CallsWithMetadata.empty ())
@@ -1756,8 +1814,10 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1756
1814
// correlate properly in applyImport in the backends.
1757
1815
if (AN.MIBs .empty ())
1758
1816
continue ;
1759
- CallsWithMetadata.push_back ({&AN});
1760
- auto *AllocNode = addAllocNode ({&AN}, FS);
1817
+ IndexCall AllocCall (&AN);
1818
+ CallsWithMetadata.push_back (AllocCall);
1819
+ CallToFunc[AllocCall] = FS;
1820
+ auto *AllocNode = addAllocNode (AllocCall, FS);
1761
1821
// Pass an empty CallStack to the CallsiteContext (second)
1762
1822
// parameter, since for ThinLTO we already collapsed out the inlined
1763
1823
// stack ids on the allocation call during ModuleSummaryAnalysis.
@@ -1788,8 +1848,11 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
1788
1848
}
1789
1849
// For callsite metadata, add to list for this function for later use.
1790
1850
if (!FS->callsites ().empty ())
1791
- for (auto &SN : FS->mutableCallsites ())
1792
- CallsWithMetadata.push_back ({&SN});
1851
+ for (auto &SN : FS->mutableCallsites ()) {
1852
+ IndexCall StackNodeCall (&SN);
1853
+ CallsWithMetadata.push_back (StackNodeCall);
1854
+ CallToFunc[StackNodeCall] = FS;
1855
+ }
1793
1856
1794
1857
if (!CallsWithMetadata.empty ())
1795
1858
FuncToCallsWithMetadata[FS] = CallsWithMetadata;
@@ -2225,6 +2288,14 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
2225
2288
if (Recursive)
2226
2289
OS << " (recursive)" ;
2227
2290
OS << " \n " ;
2291
+ if (!MatchingCalls.empty ()) {
2292
+ OS << " \t MatchingCalls:\n " ;
2293
+ for (auto &MatchingCall : MatchingCalls) {
2294
+ OS << " \t " ;
2295
+ MatchingCall.print (OS);
2296
+ OS << " \n " ;
2297
+ }
2298
+ }
2228
2299
OS << " \t AllocTypes: " << getAllocTypeString (AllocTypes) << " \n " ;
2229
2300
OS << " \t ContextIds:" ;
2230
2301
// Make a copy of the computed context ids that we can sort for stability.
@@ -2478,6 +2549,7 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
2478
2549
std::make_unique<ContextNode>(Node->IsAllocation , Node->Call ));
2479
2550
ContextNode *Clone = NodeOwner.back ().get ();
2480
2551
Node->addClone (Clone);
2552
+ Clone->MatchingCalls = Node->MatchingCalls ;
2481
2553
assert (NodeToCallingFunc.count (Node));
2482
2554
NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
2483
2555
moveEdgeToExistingCalleeClone (Edge, Clone, CallerEdgeI, /* NewClone=*/ true ,
@@ -3021,6 +3093,14 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3021
3093
if (CallMap.count (Call))
3022
3094
CallClone = CallMap[Call];
3023
3095
CallsiteClone->setCall (CallClone);
3096
+ // Need to do the same for all matching calls.
3097
+ for (auto &MatchingCall : Node->MatchingCalls ) {
3098
+ CallInfo CallClone (MatchingCall);
3099
+ if (CallMap.count (MatchingCall))
3100
+ CallClone = CallMap[MatchingCall];
3101
+ // Updates the call in the list.
3102
+ MatchingCall = CallClone;
3103
+ }
3024
3104
};
3025
3105
3026
3106
// Keep track of the clones of callsite Node that need to be assigned to
@@ -3187,6 +3267,16 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3187
3267
CallInfo NewCall (CallMap[OrigCall]);
3188
3268
assert (NewCall);
3189
3269
NewClone->setCall (NewCall);
3270
+ // Need to do the same for all matching calls.
3271
+ for (auto &MatchingCall : NewClone->MatchingCalls ) {
3272
+ CallInfo OrigMatchingCall (MatchingCall);
3273
+ OrigMatchingCall.setCloneNo (0 );
3274
+ assert (CallMap.count (OrigMatchingCall));
3275
+ CallInfo NewCall (CallMap[OrigMatchingCall]);
3276
+ assert (NewCall);
3277
+ // Updates the call in the list.
3278
+ MatchingCall = NewCall;
3279
+ }
3190
3280
}
3191
3281
}
3192
3282
// Fall through to handling below to perform the recording of the
@@ -3373,6 +3463,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3373
3463
3374
3464
if (Node->IsAllocation ) {
3375
3465
updateAllocationCall (Node->Call , allocTypeToUse (Node->AllocTypes ));
3466
+ assert (Node->MatchingCalls .empty ());
3376
3467
return ;
3377
3468
}
3378
3469
@@ -3381,6 +3472,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
3381
3472
3382
3473
auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
3383
3474
updateCall (Node->Call , CalleeFunc);
3475
+ // Update all the matching calls as well.
3476
+ for (auto &Call : Node->MatchingCalls )
3477
+ updateCall (Call, CalleeFunc);
3384
3478
};
3385
3479
3386
3480
// Performs DFS traversal starting from allocation nodes to update calls to
0 commit comments