7
7
// ===----------------------------------------------------------------------===//
8
8
9
9
#include " CtxInstrProfiling.h"
10
+ #include " RootAutoDetector.h"
10
11
#include " sanitizer_common/sanitizer_allocator_internal.h"
11
12
#include " sanitizer_common/sanitizer_atomic.h"
12
13
#include " sanitizer_common/sanitizer_atomic_clang.h"
@@ -43,6 +44,12 @@ Arena *FlatCtxArena = nullptr;
43
44
__thread bool IsUnderContext = false ;
44
45
__sanitizer::atomic_uint8_t ProfilingStarted = {};
45
46
47
+ __sanitizer::atomic_uintptr_t RootDetector = {};
48
+ RootAutoDetector *getRootDetector () {
49
+ return reinterpret_cast <RootAutoDetector *>(
50
+ __sanitizer::atomic_load_relaxed (&RootDetector));
51
+ }
52
+
46
53
// utility to taint a pointer by setting the LSB. There is an assumption
47
54
// throughout that the addresses of contexts are even (really, they should be
48
55
// align(8), but "even"-ness is the minimum assumption)
@@ -201,7 +208,7 @@ ContextNode *getCallsiteSlow(GUID Guid, ContextNode **InsertionPoint,
201
208
return Ret;
202
209
}
203
210
204
- ContextNode *getFlatProfile (FunctionData &Data, GUID Guid,
211
+ ContextNode *getFlatProfile (FunctionData &Data, void *Callee, GUID Guid,
205
212
uint32_t NumCounters) {
206
213
if (ContextNode *Existing = Data.FlatCtx )
207
214
return Existing;
@@ -232,6 +239,7 @@ ContextNode *getFlatProfile(FunctionData &Data, GUID Guid,
232
239
auto *Ret = allocContextNode (AllocBuff, Guid, NumCounters, 0 );
233
240
Data.FlatCtx = Ret;
234
241
242
+ Data.EntryAddress = Callee;
235
243
Data.Next = reinterpret_cast <FunctionData *>(
236
244
__sanitizer::atomic_load_relaxed (&AllFunctionsData));
237
245
while (!__sanitizer::atomic_compare_exchange_strong (
@@ -296,8 +304,9 @@ ContextNode *tryStartContextGivenRoot(ContextRoot *Root, GUID Guid,
296
304
return TheScratchContext;
297
305
}
298
306
299
- ContextNode *getUnhandledContext (FunctionData &Data, GUID Guid,
300
- uint32_t NumCounters) {
307
+ ContextNode *getUnhandledContext (FunctionData &Data, void *Callee, GUID Guid,
308
+ uint32_t NumCounters, uint32_t NumCallsites,
309
+ ContextRoot *CtxRoot) {
301
310
302
311
// 1) if we are currently collecting a contextual profile, fetch a ContextNode
303
312
// in the `Unhandled` set. We want to do this regardless of `ProfilingStarted`
@@ -316,27 +325,32 @@ ContextNode *getUnhandledContext(FunctionData &Data, GUID Guid,
316
325
// entered once and never exit. They should be assumed to be entered before
317
326
// profiling starts - because profiling should start after the server is up
318
327
// and running (which is equivalent to "message pumps are set up").
319
- ContextRoot *R = __llvm_ctx_profile_current_context_root;
320
- if (!R) {
328
+ if (!CtxRoot) {
329
+ if (auto *RAD = getRootDetector ())
330
+ RAD->sample ();
331
+ else if (auto *CR = Data.CtxRoot )
332
+ return tryStartContextGivenRoot (CR, Guid, NumCounters, NumCallsites);
321
333
if (IsUnderContext || !__sanitizer::atomic_load_relaxed (&ProfilingStarted))
322
334
return TheScratchContext;
323
335
else
324
336
return markAsScratch (
325
- onContextEnter (*getFlatProfile (Data, Guid, NumCounters)));
337
+ onContextEnter (*getFlatProfile (Data, Callee, Guid, NumCounters)));
326
338
}
327
- auto [Iter, Ins] = R ->Unhandled .insert ({Guid, nullptr });
339
+ auto [Iter, Ins] = CtxRoot ->Unhandled .insert ({Guid, nullptr });
328
340
if (Ins)
329
- Iter->second =
330
- getCallsiteSlow (Guid, &R-> FirstUnhandledCalleeNode , NumCounters, 0 );
341
+ Iter->second = getCallsiteSlow (Guid, &CtxRoot-> FirstUnhandledCalleeNode ,
342
+ NumCounters, 0 );
331
343
return markAsScratch (onContextEnter (*Iter->second ));
332
344
}
333
345
334
346
ContextNode *__llvm_ctx_profile_get_context (FunctionData *Data, void *Callee,
335
347
GUID Guid, uint32_t NumCounters,
336
348
uint32_t NumCallsites) {
349
+ auto *CtxRoot = __llvm_ctx_profile_current_context_root;
337
350
// fast "out" if we're not even doing contextual collection.
338
- if (!__llvm_ctx_profile_current_context_root)
339
- return getUnhandledContext (*Data, Guid, NumCounters);
351
+ if (!CtxRoot)
352
+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
353
+ nullptr );
340
354
341
355
// also fast "out" if the caller is scratch. We can see if it's scratch by
342
356
// looking at the interior pointer into the subcontexts vector that the caller
@@ -345,7 +359,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
345
359
// precisely, aligned - 8 values)
346
360
auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
347
361
if (!CallsiteContext || isScratch (CallsiteContext))
348
- return getUnhandledContext (*Data, Guid, NumCounters);
362
+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
363
+ CtxRoot);
349
364
350
365
// if the callee isn't the expected one, return scratch.
351
366
// Signal handler(s) could have been invoked at any point in the execution.
@@ -363,7 +378,8 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
363
378
// for that case.
364
379
auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
365
380
if (ExpectedCallee != Callee)
366
- return getUnhandledContext (*Data, Guid, NumCounters);
381
+ return getUnhandledContext (*Data, Callee, Guid, NumCounters, NumCallsites,
382
+ CtxRoot);
367
383
368
384
auto *Callsite = *CallsiteContext;
369
385
// in the case of indirect calls, we will have all seen targets forming a
@@ -388,21 +404,23 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
388
404
ContextNode *__llvm_ctx_profile_start_context (FunctionData *FData, GUID Guid,
389
405
uint32_t Counters,
390
406
uint32_t Callsites) {
407
+
391
408
return tryStartContextGivenRoot (FData->getOrAllocateContextRoot (), Guid,
392
409
Counters, Callsites);
393
410
}
394
411
395
412
void __llvm_ctx_profile_release_context (FunctionData *FData)
396
413
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
414
+ const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
415
+ if (!CurrentRoot || FData->CtxRoot != CurrentRoot)
416
+ return ;
397
417
IsUnderContext = false ;
398
- if (__llvm_ctx_profile_current_context_root) {
399
- __llvm_ctx_profile_current_context_root = nullptr ;
400
- assert (FData->CtxRoot );
401
- FData->CtxRoot ->Taken .Unlock ();
402
- }
418
+ assert (FData->CtxRoot );
419
+ __llvm_ctx_profile_current_context_root = nullptr ;
420
+ FData->CtxRoot ->Taken .Unlock ();
403
421
}
404
422
405
- void __llvm_ctx_profile_start_collection () {
423
+ void __llvm_ctx_profile_start_collection (unsigned AutodetectDuration ) {
406
424
size_t NumMemUnits = 0 ;
407
425
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
408
426
&AllContextsMutex);
@@ -418,12 +436,28 @@ void __llvm_ctx_profile_start_collection() {
418
436
resetContextNode (*Root->FirstUnhandledCalleeNode );
419
437
__sanitizer::atomic_store_relaxed (&Root->TotalEntries , 0 );
420
438
}
439
+ if (AutodetectDuration) {
440
+ // we leak RD intentionally. Knowing when to free it is tricky, there's a
441
+ // race condition with functions observing the `RootDectector` as non-null.
442
+ // This can be addressed but the alternatives have some added complexity and
443
+ // it's not (yet) worth it.
444
+ auto *RD = new (__sanitizer::InternalAlloc (sizeof (RootAutoDetector)))
445
+ RootAutoDetector (AllFunctionsData, RootDetector, AutodetectDuration);
446
+ RD->start ();
447
+ } else {
448
+ __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
449
+ }
421
450
__sanitizer::atomic_store_relaxed (&ProfilingStarted, true );
422
- __sanitizer::Printf (" [ctxprof] Initial NumMemUnits: %zu \n " , NumMemUnits);
423
451
}
424
452
425
453
bool __llvm_ctx_profile_fetch (ProfileWriter &Writer) {
426
454
__sanitizer::atomic_store_relaxed (&ProfilingStarted, false );
455
+ if (auto *RD = getRootDetector ()) {
456
+ __sanitizer::Printf (" [ctxprof] Expected the root autodetector to have "
457
+ " finished well before attempting to fetch a context" );
458
+ RD->join ();
459
+ }
460
+
427
461
__sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
428
462
&AllContextsMutex);
429
463
@@ -448,8 +482,9 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
448
482
const auto *Pos = reinterpret_cast <const FunctionData *>(
449
483
__sanitizer::atomic_load_relaxed (&AllFunctionsData));
450
484
for (; Pos; Pos = Pos->Next )
451
- Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
452
- Pos->FlatCtx ->counters_size ());
485
+ if (!Pos->CtxRoot )
486
+ Writer.writeFlat (Pos->FlatCtx ->guid (), Pos->FlatCtx ->counters (),
487
+ Pos->FlatCtx ->counters_size ());
453
488
Writer.endFlatSection ();
454
489
return true ;
455
490
}
0 commit comments