Skip to content

Commit 7420f38

Browse files
committed
[ctxprof] Handle musttail
1 parent 27bc8a1 commit 7420f38

File tree

8 files changed

+100
-33
lines changed

8 files changed

+100
-33
lines changed

compiler-rt/lib/ctx_profile/CtxInstrContextNode.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,11 @@ class ContextNode final {
125125
/// VOLATILE_PTRDECL is the same as above, but for volatile pointers;
126126
///
127127
/// MUTEXDECL takes one parameter, the name of a field that is a mutex.
128-
#define CTXPROF_FUNCTION_DATA(PTRDECL, VOLATILE_PTRDECL, MUTEXDECL) \
128+
#define CTXPROF_FUNCTION_DATA(PTRDECL, CONTEXT_PTR, VOLATILE_PTRDECL, \
129+
MUTEXDECL) \
129130
PTRDECL(FunctionData, Next) \
130131
VOLATILE_PTRDECL(void, EntryAddress) \
131-
VOLATILE_PTRDECL(ContextRoot, CtxRoot) \
132+
CONTEXT_PTR \
132133
VOLATILE_PTRDECL(ContextNode, FlatCtx) \
133134
MUTEXDECL(Mutex)
134135

compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,8 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
272272

273273
ContextRoot *FunctionData::getOrAllocateContextRoot() {
274274
auto *Root = CtxRoot;
275+
if (!canBeRoot(Root))
276+
return Root;
275277
if (Root)
276278
return Root;
277279
__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
@@ -328,8 +330,10 @@ ContextNode *getUnhandledContext(FunctionData &Data, void *Callee, GUID Guid,
328330
if (!CtxRoot) {
329331
if (auto *RAD = getRootDetector())
330332
RAD->sample();
331-
else if (auto *CR = Data.CtxRoot)
332-
return tryStartContextGivenRoot(CR, Guid, NumCounters, NumCallsites);
333+
else if (auto *CR = Data.CtxRoot) {
334+
if (canBeRoot(CR))
335+
return tryStartContextGivenRoot(CR, Guid, NumCounters, NumCallsites);
336+
}
333337
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
334338
return TheScratchContext;
335339
else
@@ -404,20 +408,21 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
404408
ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid,
405409
uint32_t Counters,
406410
uint32_t Callsites) {
407-
408-
return tryStartContextGivenRoot(FData->getOrAllocateContextRoot(), Guid,
409-
Counters, Callsites);
411+
auto *Root = FData->getOrAllocateContextRoot();
412+
assert(canBeRoot(Root));
413+
return tryStartContextGivenRoot(Root, Guid, Counters, Callsites);
410414
}
411415

412416
void __llvm_ctx_profile_release_context(FunctionData *FData)
413417
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
414418
const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
415-
if (!CurrentRoot || FData->CtxRoot != CurrentRoot)
419+
auto *CR = FData->CtxRoot;
420+
if (!CurrentRoot || CR != CurrentRoot)
416421
return;
417422
IsUnderContext = false;
418-
assert(FData->CtxRoot);
423+
assert(CR && canBeRoot(CR));
419424
__llvm_ctx_profile_current_context_root = nullptr;
420-
FData->CtxRoot->Taken.Unlock();
425+
CR->Taken.Unlock();
421426
}
422427

423428
void __llvm_ctx_profile_start_collection(unsigned AutodetectDuration) {
@@ -481,10 +486,13 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
481486
// traversing it.
482487
const auto *Pos = reinterpret_cast<const FunctionData *>(
483488
__sanitizer::atomic_load_relaxed(&AllFunctionsData));
484-
for (; Pos; Pos = Pos->Next)
485-
if (!Pos->CtxRoot)
486-
Writer.writeFlat(Pos->FlatCtx->guid(), Pos->FlatCtx->counters(),
487-
Pos->FlatCtx->counters_size());
489+
for (; Pos; Pos = Pos->Next) {
490+
const auto *CR = Pos->CtxRoot;
491+
if (!CR && canBeRoot(CR)) {
492+
const auto *FP = Pos->FlatCtx;
493+
Writer.writeFlat(FP->guid(), FP->counters(), FP->counters_size());
494+
}
495+
}
488496
Writer.endFlatSection();
489497
return true;
490498
}

compiler-rt/lib/ctx_profile/CtxInstrProfiling.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
#include "sanitizer_common/sanitizer_mutex.h"
1515
#include <sanitizer/common_interface_defs.h>
1616

17+
#include <assert.h>
18+
1719
using namespace llvm::ctx_profile;
1820

1921
// Forward-declare for the one unittest checking Arena construction zeroes out
@@ -145,7 +147,9 @@ struct FunctionData {
145147
#define _PTRDECL(T, N) T *N = nullptr;
146148
#define _VOLATILE_PTRDECL(T, N) T *volatile N = nullptr;
147149
#define _MUTEXDECL(N) ::__sanitizer::SpinMutex N;
148-
CTXPROF_FUNCTION_DATA(_PTRDECL, _VOLATILE_PTRDECL, _MUTEXDECL)
150+
#define _CONTEXT_PTR ContextRoot *CtxRoot = nullptr;
151+
CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_PTR, _VOLATILE_PTRDECL, _MUTEXDECL)
152+
#undef _CONTEXT_PTR
149153
#undef _PTRDECL
150154
#undef _VOLATILE_PTRDECL
151155
#undef _MUTEXDECL
@@ -167,6 +171,11 @@ inline bool isScratch(const void *Ctx) {
167171
return (reinterpret_cast<uint64_t>(Ctx) & 1);
168172
}
169173

174+
// True if Ctx is either nullptr or not the 0x1 value.
175+
inline bool canBeRoot(const ContextRoot *Ctx) {
176+
return reinterpret_cast<uintptr_t>(Ctx) != 1U;
177+
}
178+
170179
} // namespace __ctx_profile
171180

172181
extern "C" {

compiler-rt/lib/ctx_profile/RootAutoDetector.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,16 @@ void RootAutoDetector::start() {
6666
atomic_load_relaxed(&RAD->FunctionDataListHead));
6767
FD; FD = FD->Next) {
6868
if (AllRoots.contains(reinterpret_cast<uptr>(FD->EntryAddress))) {
69-
FD->getOrAllocateContextRoot();
69+
if (canBeRoot(FD->CtxRoot)) {
70+
FD->getOrAllocateContextRoot();
71+
} else {
72+
// FIXME: address this by informing the root detection algorithm
73+
// to skip over such functions and pick the next down in the
74+
// stack. At that point, this becomes an assert.
75+
Printf("[ctxprof] Root auto-detector selected a musttail "
76+
"function for root (%p). Ignoring\n",
77+
FD->EntryAddress);
78+
}
7079
}
7180
}
7281
atomic_store_relaxed(&RAD->Self, 0);

compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,3 +301,13 @@ TEST_F(ContextTest, Dump) {
301301
EXPECT_EQ(W2.FlatsWritten, 1);
302302
EXPECT_EQ(W2.ExitedFlatCount, 1);
303303
}
304+
305+
TEST_F(ContextTest, MustNotBeRoot) {
306+
FunctionData FData;
307+
FData.CtxRoot = reinterpret_cast<ContextRoot *>(1U);
308+
int FakeCalleeAddress = 0;
309+
__llvm_ctx_profile_start_collection();
310+
auto *Subctx =
311+
__llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
312+
EXPECT_TRUE(isScratch(Subctx));
313+
}

llvm/include/llvm/ProfileData/CtxInstrContextNode.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,11 @@ class ContextNode final {
125125
/// VOLATILE_PTRDECL is the same as above, but for volatile pointers;
126126
///
127127
/// MUTEXDECL takes one parameter, the name of a field that is a mutex.
128-
#define CTXPROF_FUNCTION_DATA(PTRDECL, VOLATILE_PTRDECL, MUTEXDECL) \
128+
#define CTXPROF_FUNCTION_DATA(PTRDECL, CONTEXT_PTR, VOLATILE_PTRDECL, \
129+
MUTEXDECL) \
129130
PTRDECL(FunctionData, Next) \
130131
VOLATILE_PTRDECL(void, EntryAddress) \
131-
VOLATILE_PTRDECL(ContextRoot, CtxRoot) \
132+
CONTEXT_PTR \
132133
VOLATILE_PTRDECL(ContextNode, FlatCtx) \
133134
MUTEXDECL(Mutex)
134135

llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@
1212
#include "llvm/Analysis/CtxProfAnalysis.h"
1313
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
1414
#include "llvm/IR/Analysis.h"
15+
#include "llvm/IR/Constants.h"
1516
#include "llvm/IR/DiagnosticInfo.h"
1617
#include "llvm/IR/GlobalValue.h"
1718
#include "llvm/IR/IRBuilder.h"
19+
#include "llvm/IR/InstrTypes.h"
1820
#include "llvm/IR/Instructions.h"
1921
#include "llvm/IR/IntrinsicInst.h"
2022
#include "llvm/IR/Module.h"
@@ -55,14 +57,15 @@ class CtxInstrumentationLowerer final {
5557
Module &M;
5658
ModuleAnalysisManager &MAM;
5759
Type *ContextNodeTy = nullptr;
58-
Type *FunctionDataTy = nullptr;
60+
StructType *FunctionDataTy = nullptr;
5961

6062
DenseSet<const Function *> ContextRootSet;
6163
Function *StartCtx = nullptr;
6264
Function *GetCtx = nullptr;
6365
Function *ReleaseCtx = nullptr;
6466
GlobalVariable *ExpectedCalleeTLS = nullptr;
6567
GlobalVariable *CallsiteInfoTLS = nullptr;
68+
Constant *CannotBeRootInitializer = nullptr;
6669

6770
public:
6871
CtxInstrumentationLowerer(Module &M, ModuleAnalysisManager &MAM);
@@ -117,12 +120,29 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
117120

118121
#define _PTRDECL(_, __) PointerTy,
119122
#define _VOLATILE_PTRDECL(_, __) PointerTy,
123+
#define _CONTEXT_ROOT PointerTy,
120124
#define _MUTEXDECL(_) SanitizerMutexType,
121125

122126
FunctionDataTy = StructType::get(
123-
M.getContext(),
124-
{CTXPROF_FUNCTION_DATA(_PTRDECL, _VOLATILE_PTRDECL, _MUTEXDECL)});
127+
M.getContext(), {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT,
128+
_VOLATILE_PTRDECL, _MUTEXDECL)});
125129
#undef _PTRDECL
130+
#undef _CONTEXT_ROOT
131+
#undef _VOLATILE_PTRDECL
132+
#undef _MUTEXDECL
133+
134+
#define _PTRDECL(_, __) Constant::getNullValue(PointerTy),
135+
#define _VOLATILE_PTRDECL(_, __) _PTRDECL(_, __)
136+
#define _MUTEXDECL(_) Constant::getNullValue(SanitizerMutexType),
137+
#define _CONTEXT_ROOT \
138+
Constant::getIntegerValue( \
139+
PointerTy, \
140+
APInt(M.getDataLayout().getPointerTypeSizeInBits(PointerTy), 1U)),
141+
CannotBeRootInitializer = ConstantStruct::get(
142+
FunctionDataTy, {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT,
143+
_VOLATILE_PTRDECL, _MUTEXDECL)});
144+
#undef _PTRDECL
145+
#undef _CONTEXT_ROOT
126146
#undef _VOLATILE_PTRDECL
127147
#undef _MUTEXDECL
128148

@@ -134,8 +154,8 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
134154
I32Ty, /*NumCallsites*/
135155
});
136156

137-
// Define a global for each entrypoint. We'll reuse the entrypoint's name as
138-
// prefix. We assume the entrypoint names to be unique.
157+
// Define a global for each entrypoint. We'll reuse the entrypoint's name
158+
// as prefix. We assume the entrypoint names to be unique.
139159
for (const auto &Fname : ContextRoots) {
140160
if (const auto *F = M.getFunction(Fname)) {
141161
if (F->isDeclaration())
@@ -145,10 +165,10 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
145165
for (const auto &I : BB)
146166
if (const auto *CB = dyn_cast<CallBase>(&I))
147167
if (CB->isMustTailCall()) {
148-
M.getContext().emitError(
149-
"The function " + Fname +
150-
" was indicated as a context root, but it features musttail "
151-
"calls, which is not supported.");
168+
M.getContext().emitError("The function " + Fname +
169+
" was indicated as a context root, "
170+
"but it features musttail "
171+
"calls, which is not supported.");
152172
}
153173
}
154174
}
@@ -240,6 +260,14 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
240260
return false;
241261
}();
242262

263+
if (HasMusttail && ContextRootSet.contains(&F)) {
264+
F.getContext().emitError(
265+
"[ctx_prof] A function with musttail calls was explicitly requested as "
266+
"root. That is not supported because we cannot instrument a return "
267+
"instruction to release the context: " +
268+
F.getName());
269+
return false;
270+
}
243271
auto &Head = F.getEntryBlock();
244272
for (auto &I : Head) {
245273
// Find the increment intrinsic in the entry basic block.
@@ -263,9 +291,14 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
263291
// regular function)
264292
// Don't set a name, they end up taking a lot of space and we don't need
265293
// them.
294+
295+
// Zero-initialize the FunctionData, except for functions that have
296+
// musttail calls. There, we set the CtxRoot field to 1, which will be
297+
// treated as a "can't be set as root".
266298
TheRootFuctionData = new GlobalVariable(
267299
M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
268-
Constant::getNullValue(FunctionDataTy));
300+
HasMusttail ? CannotBeRootInitializer
301+
: Constant::getNullValue(FunctionDataTy));
269302

270303
if (ContextRootSet.contains(&F)) {
271304
Context = Builder.CreateCall(
@@ -366,10 +399,6 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
366399
}
367400
}
368401
}
369-
// FIXME: This would happen if the entrypoint tailcalls. A way to fix would be
370-
// to disallow this, (so this then stays as an error), another is to detect
371-
// that and then do a wrapper or disallow the tail call. This only affects
372-
// instrumentation, when we want to detect the call graph.
373402
if (!HasMusttail && !ContextWasReleased)
374403
F.getContext().emitError(
375404
"[ctx_prof] A function that doesn't have musttail calls was "

llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ declare void @bar()
1818
; LOWERING: @[[GLOB4:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
1919
; LOWERING: @[[GLOB5:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
2020
; LOWERING: @[[GLOB6:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
21-
; LOWERING: @[[GLOB7:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
21+
; LOWERING: @[[GLOB7:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } { ptr null, ptr null, ptr inttoptr (i64 1 to ptr), ptr null, i8 0 }
2222
;.
2323
define void @foo(i32 %a, ptr %fct) {
2424
; INSTRUMENT-LABEL: define void @foo(

0 commit comments

Comments
 (0)