@@ -163,16 +163,10 @@ void CallStackTrie::addCallStack(
163
163
continue ;
164
164
}
165
165
// Update existing caller node if it exists.
166
- CallStackTrieNode *Prev = nullptr ;
167
166
auto [Next, Inserted] = Curr->Callers .try_emplace (StackId);
168
167
if (!Inserted) {
169
- Prev = Curr;
170
168
Curr = Next->second ;
171
169
Curr->addAllocType (AllocType);
172
- // If this node has an ambiguous alloc type, its callee is not the deepest
173
- // point where we have an ambigous allocation type.
174
- if (!hasSingleAllocType (Curr->AllocTypes ))
175
- Prev->DeepestAmbiguousAllocType = false ;
176
170
continue ;
177
171
}
178
172
// Otherwise add a new caller node.
@@ -248,41 +242,114 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
248
242
convertHotToNotCold (Caller.second );
249
243
}
250
244
245
+ // Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending
246
+ // on options that enable filtering out some NotCold contexts.
247
+ static void SaveFilteredNewMIBNodes (std::vector<Metadata *> &NewMIBNodes,
248
+ std::vector<Metadata *> &SavedMIBNodes,
249
+ unsigned CallerContextLength) {
250
+ // In the simplest case, with pruning disabled, keep all the new MIB nodes.
251
+ if (MemProfKeepAllNotColdContexts)
252
+ append_range (SavedMIBNodes, NewMIBNodes);
253
+
254
+ auto EmitMessageForRemovedContexts = [](const MDNode *MIBMD, StringRef Tag,
255
+ StringRef Extra) {
256
+ assert (MIBMD->getNumOperands () > 2 );
257
+ for (unsigned I = 2 ; I < MIBMD->getNumOperands (); I++) {
258
+ MDNode *ContextSizePair = dyn_cast<MDNode>(MIBMD->getOperand (I));
259
+ assert (ContextSizePair->getNumOperands () == 2 );
260
+ uint64_t FullStackId =
261
+ mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand (0 ))
262
+ ->getZExtValue ();
263
+ uint64_t TS =
264
+ mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand (1 ))
265
+ ->getZExtValue ();
266
+ errs () << " MemProf hinting: Total size for " << Tag
267
+ << " non-cold full allocation context hash " << FullStackId
268
+ << Extra << " : " << TS << " \n " ;
269
+ }
270
+ };
271
+
272
+ // Prune unneeded NotCold contexts, taking advantage of the fact
273
+ // that we later will only clone Cold contexts, as NotCold is the allocation
274
+ // default. We only need to keep as metadata the NotCold contexts that
275
+ // overlap the longest with Cold allocations, so that we know how deeply we
276
+ // need to clone. For example, assume we add the following contexts to the
277
+ // trie:
278
+ // 1 3 (notcold)
279
+ // 1 2 4 (cold)
280
+ // 1 2 5 (notcold)
281
+ // 1 2 6 (notcold)
282
+ // the trie looks like:
283
+ // 1
284
+ // / \
285
+ // 2 3
286
+ // /|\
287
+ // 4 5 6
288
+ //
289
+ // It is sufficient to prune all but one not-cold contexts (either 1,2,5 or
290
+ // 1,2,6, we arbitrarily keep the first one we encounter which will be
291
+ // 1,2,5).
292
+ //
293
+ // To do this pruning, we first check if there were any not-cold
294
+ // contexts kept for a deeper caller, which will have a context length larger
295
+ // than the CallerContextLength being handled here (i.e. kept by a deeper
296
+ // recursion step). If so, none of the not-cold MIB nodes added for the
297
+ // immediate callers need to be kept. If not, we keep the first (created
298
+ // for the immediate caller) not-cold MIB node.
299
+ bool LongerNotColdContextKept = false ;
300
+ for (auto *MIB : NewMIBNodes) {
301
+ auto MIBMD = cast<MDNode>(MIB);
302
+ if (getMIBAllocType (MIBMD) == AllocationType::Cold)
303
+ continue ;
304
+ MDNode *StackMD = getMIBStackNode (MIBMD);
305
+ assert (StackMD);
306
+ if (StackMD->getNumOperands () > CallerContextLength) {
307
+ LongerNotColdContextKept = true ;
308
+ break ;
309
+ }
310
+ }
311
+ // Don't need to emit any for the immediate caller if we already have
312
+ // longer overlapping contexts;
313
+ bool KeepFirstNewNotCold = !LongerNotColdContextKept;
314
+ auto NewColdMIBNodes = make_filter_range (NewMIBNodes, [&](const Metadata *M) {
315
+ auto MIBMD = cast<MDNode>(M);
316
+ // Only keep cold contexts and first (longest non-cold context).
317
+ if (getMIBAllocType (MIBMD) != AllocationType::Cold) {
318
+ MDNode *StackMD = getMIBStackNode (MIBMD);
319
+ assert (StackMD);
320
+ // Keep any already kept for longer contexts.
321
+ if (StackMD->getNumOperands () > CallerContextLength)
322
+ return true ;
323
+ // Otherwise keep the first one added by the immediate caller if there
324
+ // were no longer contexts.
325
+ if (KeepFirstNewNotCold) {
326
+ KeepFirstNewNotCold = false ;
327
+ return true ;
328
+ }
329
+ if (MemProfReportHintedSizes)
330
+ EmitMessageForRemovedContexts (MIBMD, " pruned" , " " );
331
+ return false ;
332
+ }
333
+ return true ;
334
+ });
335
+ for (auto *M : NewColdMIBNodes)
336
+ SavedMIBNodes.push_back (M);
337
+ }
338
+
251
339
// Recursive helper to trim contexts and create metadata nodes.
252
340
// Caller should have pushed Node's loc to MIBCallStack. Doing this in the
253
341
// caller makes it simpler to handle the many early returns in this method.
254
342
bool CallStackTrie::buildMIBNodes (CallStackTrieNode *Node, LLVMContext &Ctx,
255
343
std::vector<uint64_t > &MIBCallStack,
256
344
std::vector<Metadata *> &MIBNodes,
257
- bool CalleeHasAmbiguousCallerContext,
258
- bool &CalleeDeepestAmbiguousAllocType) {
345
+ bool CalleeHasAmbiguousCallerContext) {
259
346
// Trim context below the first node in a prefix with a single alloc type.
260
347
// Add an MIB record for the current call stack prefix.
261
348
if (hasSingleAllocType (Node->AllocTypes )) {
262
- // Because we only clone cold contexts (we don't clone for exposing NotCold
263
- // contexts as that is the default allocation behavior), we create MIB
264
- // metadata for this context if any of the following are true:
265
- // 1) It is cold.
266
- // 2) The immediate callee is the deepest point where we have an ambiguous
267
- // allocation type (i.e. the other callers that are cold need to know
268
- // that we have a not cold context overlapping to this point so that we
269
- // know how deep to clone).
270
- // 3) MemProfKeepAllNotColdContexts is enabled, which is useful if we are
271
- // reporting hinted sizes, and want to get information from the indexing
272
- // step for all contexts, or have specified a value less than 100% for
273
- // -memprof-cloning-cold-threshold.
274
- if (Node->hasAllocType (AllocationType::Cold) ||
275
- CalleeDeepestAmbiguousAllocType || MemProfKeepAllNotColdContexts) {
276
- std::vector<ContextTotalSize> ContextSizeInfo;
277
- collectContextSizeInfo (Node, ContextSizeInfo);
278
- MIBNodes.push_back (createMIBNode (Ctx, MIBCallStack,
279
- (AllocationType)Node->AllocTypes ,
280
- ContextSizeInfo));
281
- // If we just emitted an MIB for a not cold caller, don't need to emit
282
- // another one for the callee to correctly disambiguate its cold callers.
283
- if (!Node->hasAllocType (AllocationType::Cold))
284
- CalleeDeepestAmbiguousAllocType = false ;
285
- }
349
+ std::vector<ContextTotalSize> ContextSizeInfo;
350
+ collectContextSizeInfo (Node, ContextSizeInfo);
351
+ MIBNodes.push_back (createMIBNode (
352
+ Ctx, MIBCallStack, (AllocationType)Node->AllocTypes , ContextSizeInfo));
286
353
return true ;
287
354
}
288
355
@@ -291,14 +358,21 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
291
358
if (!Node->Callers .empty ()) {
292
359
bool NodeHasAmbiguousCallerContext = Node->Callers .size () > 1 ;
293
360
bool AddedMIBNodesForAllCallerContexts = true ;
361
+ // Accumulate all new MIB nodes by the recursive calls below into a vector
362
+ // that will later be filtered before adding to the caller's MIBNodes
363
+ // vector.
364
+ std::vector<Metadata *> NewMIBNodes;
294
365
for (auto &Caller : Node->Callers ) {
295
366
MIBCallStack.push_back (Caller.first );
296
- AddedMIBNodesForAllCallerContexts &= buildMIBNodes (
297
- Caller.second , Ctx, MIBCallStack, MIBNodes ,
298
- NodeHasAmbiguousCallerContext, Node-> DeepestAmbiguousAllocType );
367
+ AddedMIBNodesForAllCallerContexts &=
368
+ buildMIBNodes ( Caller.second , Ctx, MIBCallStack, NewMIBNodes ,
369
+ NodeHasAmbiguousCallerContext );
299
370
// Remove Caller.
300
371
MIBCallStack.pop_back ();
301
372
}
373
+ // Pass in the stack length of the MIB nodes added for the immediate caller,
374
+ // which is the current stack length plus 1.
375
+ SaveFilteredNewMIBNodes (NewMIBNodes, MIBNodes, MIBCallStack.size () + 1 );
302
376
if (AddedMIBNodesForAllCallerContexts)
303
377
return true ;
304
378
// We expect that the callers should be forced to add MIBs to disambiguate
@@ -372,13 +446,8 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
372
446
// The CalleeHasAmbiguousCallerContext flag is meant to say whether the
373
447
// callee of the given node has more than one caller. Here the node being
374
448
// passed in is the alloc and it has no callees. So it's false.
375
- // Similarly, the last parameter is meant to say whether the callee of the
376
- // given node is the deepest point where we have ambiguous alloc types, which
377
- // is also false as the alloc has no callees.
378
- bool DeepestAmbiguousAllocType = true ;
379
449
if (buildMIBNodes (Alloc, Ctx, MIBCallStack, MIBNodes,
380
- /* CalleeHasAmbiguousCallerContext=*/ false ,
381
- DeepestAmbiguousAllocType)) {
450
+ /* CalleeHasAmbiguousCallerContext=*/ false )) {
382
451
assert (MIBCallStack.size () == 1 &&
383
452
" Should only be left with Alloc's location in stack" );
384
453
CI->setMetadata (LLVMContext::MD_memprof, MDNode::get (Ctx, MIBNodes));
0 commit comments