-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[ctxprof] Handle instrumenting functions with musttail
calls
#135121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
✅ With the latest revision this PR passed the C/C++ code formatter. |
e651f76
to
81e5053
Compare
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Mircea Trofin (mtrofin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/135121.diff 8 Files Affected:
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
index 55423d95b3088..e4e310b2e987d 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrContextNode.h
@@ -125,10 +125,11 @@ class ContextNode final {
/// VOLATILE_PTRDECL is the same as above, but for volatile pointers;
///
/// MUTEXDECL takes one parameter, the name of a field that is a mutex.
-#define CTXPROF_FUNCTION_DATA(PTRDECL, VOLATILE_PTRDECL, MUTEXDECL) \
+#define CTXPROF_FUNCTION_DATA(PTRDECL, CONTEXT_PTR, VOLATILE_PTRDECL, \
+ MUTEXDECL) \
PTRDECL(FunctionData, Next) \
VOLATILE_PTRDECL(void, EntryAddress) \
- VOLATILE_PTRDECL(ContextRoot, CtxRoot) \
+ CONTEXT_PTR \
VOLATILE_PTRDECL(ContextNode, FlatCtx) \
MUTEXDECL(Mutex)
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
index 4cf852fe3f667..8b4eb2c2611b5 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.cpp
@@ -272,6 +272,8 @@ void setupContext(ContextRoot *Root, GUID Guid, uint32_t NumCounters,
ContextRoot *FunctionData::getOrAllocateContextRoot() {
auto *Root = CtxRoot;
+ if (mustNotBeRoot(Root))
+ return Root;
if (Root)
return Root;
__sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> L(&Mutex);
@@ -328,8 +330,10 @@ ContextNode *getUnhandledContext(FunctionData &Data, void *Callee, GUID Guid,
if (!CtxRoot) {
if (auto *RAD = getRootDetector())
RAD->sample();
- else if (auto *CR = Data.CtxRoot)
- return tryStartContextGivenRoot(CR, Guid, NumCounters, NumCallsites);
+ else if (auto *CR = Data.CtxRoot) {
+ if (!mustNotBeRoot(CR))
+ return tryStartContextGivenRoot(CR, Guid, NumCounters, NumCallsites);
+ }
if (IsUnderContext || !__sanitizer::atomic_load_relaxed(&ProfilingStarted))
return TheScratchContext;
else
@@ -404,20 +408,21 @@ ContextNode *__llvm_ctx_profile_get_context(FunctionData *Data, void *Callee,
ContextNode *__llvm_ctx_profile_start_context(FunctionData *FData, GUID Guid,
uint32_t Counters,
uint32_t Callsites) {
-
- return tryStartContextGivenRoot(FData->getOrAllocateContextRoot(), Guid,
- Counters, Callsites);
+ auto *Root = FData->getOrAllocateContextRoot();
+ assert(!mustNotBeRoot(Root));
+ return tryStartContextGivenRoot(Root, Guid, Counters, Callsites);
}
void __llvm_ctx_profile_release_context(FunctionData *FData)
SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
const auto *CurrentRoot = __llvm_ctx_profile_current_context_root;
- if (!CurrentRoot || FData->CtxRoot != CurrentRoot)
+ auto *CR = FData->CtxRoot;
+ if (!CurrentRoot || CR != CurrentRoot)
return;
IsUnderContext = false;
- assert(FData->CtxRoot);
+ assert(CR && !mustNotBeRoot(CR));
__llvm_ctx_profile_current_context_root = nullptr;
- FData->CtxRoot->Taken.Unlock();
+ CR->Taken.Unlock();
}
void __llvm_ctx_profile_start_collection(unsigned AutodetectDuration) {
@@ -481,10 +486,13 @@ bool __llvm_ctx_profile_fetch(ProfileWriter &Writer) {
// traversing it.
const auto *Pos = reinterpret_cast<const FunctionData *>(
__sanitizer::atomic_load_relaxed(&AllFunctionsData));
- for (; Pos; Pos = Pos->Next)
- if (!Pos->CtxRoot)
- Writer.writeFlat(Pos->FlatCtx->guid(), Pos->FlatCtx->counters(),
- Pos->FlatCtx->counters_size());
+ for (; Pos; Pos = Pos->Next) {
+ const auto *CR = Pos->CtxRoot;
+ if (!CR && !mustNotBeRoot(CR)) {
+ const auto *FP = Pos->FlatCtx;
+ Writer.writeFlat(FP->guid(), FP->counters(), FP->counters_size());
+ }
+ }
Writer.endFlatSection();
return true;
}
diff --git a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
index 4983f086d230d..df23389d2b3e5 100644
--- a/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
+++ b/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h
@@ -145,7 +145,9 @@ struct FunctionData {
#define _PTRDECL(T, N) T *N = nullptr;
#define _VOLATILE_PTRDECL(T, N) T *volatile N = nullptr;
#define _MUTEXDECL(N) ::__sanitizer::SpinMutex N;
- CTXPROF_FUNCTION_DATA(_PTRDECL, _VOLATILE_PTRDECL, _MUTEXDECL)
+#define _CONTEXT_PTR ContextRoot *CtxRoot = nullptr;
+ CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_PTR, _VOLATILE_PTRDECL, _MUTEXDECL)
+#undef _CONTEXT_PTR
#undef _PTRDECL
#undef _VOLATILE_PTRDECL
#undef _MUTEXDECL
@@ -167,6 +169,8 @@ inline bool isScratch(const void *Ctx) {
return (reinterpret_cast<uint64_t>(Ctx) & 1);
}
+inline bool mustNotBeRoot(const ContextRoot *Ctx) { return isScratch(Ctx); }
+
} // namespace __ctx_profile
extern "C" {
diff --git a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
index 4aa169e202ea3..2256a2d0c1079 100644
--- a/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
+++ b/compiler-rt/lib/ctx_profile/RootAutoDetector.cpp
@@ -66,7 +66,16 @@ void RootAutoDetector::start() {
atomic_load_relaxed(&RAD->FunctionDataListHead));
FD; FD = FD->Next) {
if (AllRoots.contains(reinterpret_cast<uptr>(FD->EntryAddress))) {
- FD->getOrAllocateContextRoot();
+ if (!mustNotBeRoot(FD->CtxRoot)) {
+ FD->getOrAllocateContextRoot();
+ } else {
+ // FIXME: address this by informing the root detection algorithm
+ // to skip over such functions and pick the next down in the
+ // stack. At that point, this becomes an assert.
+ Printf("[ctxprof] Root auto-detector selected a musttail "
+ "function for root (%p). Ignoring\n",
+ FD->EntryAddress);
+ }
}
}
atomic_store_relaxed(&RAD->Self, 0);
diff --git a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
index 83756fed0d6e6..31afa4f697a34 100644
--- a/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
+++ b/compiler-rt/lib/ctx_profile/tests/CtxInstrProfilingTest.cpp
@@ -301,3 +301,13 @@ TEST_F(ContextTest, Dump) {
EXPECT_EQ(W2.FlatsWritten, 1);
EXPECT_EQ(W2.ExitedFlatCount, 1);
}
+
+TEST_F(ContextTest, MustNotBeRoot) {
+ FunctionData FData;
+ FData.CtxRoot = reinterpret_cast<ContextRoot *>(1U);
+ int FakeCalleeAddress = 0;
+ __llvm_ctx_profile_start_collection();
+ auto *Subctx =
+ __llvm_ctx_profile_get_context(&FData, &FakeCalleeAddress, 2, 3, 1);
+ EXPECT_TRUE(isScratch(Subctx));
+}
\ No newline at end of file
diff --git a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h
index 55423d95b3088..e4e310b2e987d 100644
--- a/llvm/include/llvm/ProfileData/CtxInstrContextNode.h
+++ b/llvm/include/llvm/ProfileData/CtxInstrContextNode.h
@@ -125,10 +125,11 @@ class ContextNode final {
/// VOLATILE_PTRDECL is the same as above, but for volatile pointers;
///
/// MUTEXDECL takes one parameter, the name of a field that is a mutex.
-#define CTXPROF_FUNCTION_DATA(PTRDECL, VOLATILE_PTRDECL, MUTEXDECL) \
+#define CTXPROF_FUNCTION_DATA(PTRDECL, CONTEXT_PTR, VOLATILE_PTRDECL, \
+ MUTEXDECL) \
PTRDECL(FunctionData, Next) \
VOLATILE_PTRDECL(void, EntryAddress) \
- VOLATILE_PTRDECL(ContextRoot, CtxRoot) \
+ CONTEXT_PTR \
VOLATILE_PTRDECL(ContextNode, FlatCtx) \
MUTEXDECL(Mutex)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index a314457423819..f99d7b9d03e02 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -12,9 +12,11 @@
#include "llvm/Analysis/CtxProfAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Analysis.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -55,7 +57,7 @@ class CtxInstrumentationLowerer final {
Module &M;
ModuleAnalysisManager &MAM;
Type *ContextNodeTy = nullptr;
- Type *FunctionDataTy = nullptr;
+ StructType *FunctionDataTy = nullptr;
DenseSet<const Function *> ContextRootSet;
Function *StartCtx = nullptr;
@@ -63,6 +65,7 @@ class CtxInstrumentationLowerer final {
Function *ReleaseCtx = nullptr;
GlobalVariable *ExpectedCalleeTLS = nullptr;
GlobalVariable *CallsiteInfoTLS = nullptr;
+ Constant *CannotBeRootInitializer = nullptr;
public:
CtxInstrumentationLowerer(Module &M, ModuleAnalysisManager &MAM);
@@ -117,12 +120,29 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
#define _PTRDECL(_, __) PointerTy,
#define _VOLATILE_PTRDECL(_, __) PointerTy,
+#define _CONTEXT_ROOT PointerTy,
#define _MUTEXDECL(_) SanitizerMutexType,
FunctionDataTy = StructType::get(
- M.getContext(),
- {CTXPROF_FUNCTION_DATA(_PTRDECL, _VOLATILE_PTRDECL, _MUTEXDECL)});
+ M.getContext(), {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT,
+ _VOLATILE_PTRDECL, _MUTEXDECL)});
#undef _PTRDECL
+#undef _CONTEXT_ROOT
+#undef _VOLATILE_PTRDECL
+#undef _MUTEXDECL
+
+#define _PTRDECL(_, __) Constant::getNullValue(PointerTy),
+#define _VOLATILE_PTRDECL(_, __) _PTRDECL(_, __)
+#define _MUTEXDECL(_) Constant::getNullValue(SanitizerMutexType),
+#define _CONTEXT_ROOT \
+ Constant::getIntegerValue( \
+ PointerTy, \
+ APInt(M.getDataLayout().getPointerTypeSizeInBits(PointerTy), 1U)),
+ CannotBeRootInitializer = ConstantStruct::get(
+ FunctionDataTy, {CTXPROF_FUNCTION_DATA(_PTRDECL, _CONTEXT_ROOT,
+ _VOLATILE_PTRDECL, _MUTEXDECL)});
+#undef _PTRDECL
+#undef _CONTEXT_ROOT
#undef _VOLATILE_PTRDECL
#undef _MUTEXDECL
@@ -134,8 +154,8 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
I32Ty, /*NumCallsites*/
});
- // Define a global for each entrypoint. We'll reuse the entrypoint's name as
- // prefix. We assume the entrypoint names to be unique.
+ // Define a global for each entrypoint. We'll reuse the entrypoint's name
+ // as prefix. We assume the entrypoint names to be unique.
for (const auto &Fname : ContextRoots) {
if (const auto *F = M.getFunction(Fname)) {
if (F->isDeclaration())
@@ -145,10 +165,10 @@ CtxInstrumentationLowerer::CtxInstrumentationLowerer(Module &M,
for (const auto &I : BB)
if (const auto *CB = dyn_cast<CallBase>(&I))
if (CB->isMustTailCall()) {
- M.getContext().emitError(
- "The function " + Fname +
- " was indicated as a context root, but it features musttail "
- "calls, which is not supported.");
+ M.getContext().emitError("The function " + Fname +
+ " was indicated as a context root, "
+ "but it features musttail "
+ "calls, which is not supported.");
}
}
}
@@ -240,6 +260,14 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
return false;
}();
+ if (HasMusttail && ContextRootSet.contains(&F)) {
+ F.getContext().emitError(
+ "[ctx_prof] A function with musttail calls was explicitly requested as "
+ "root. That is not supported because we cannot instrument a return "
+ "instruction to release the context: " +
+ F.getName());
+ return false;
+ }
auto &Head = F.getEntryBlock();
for (auto &I : Head) {
// Find the increment intrinsic in the entry basic block.
@@ -263,9 +291,14 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
// regular function)
// Don't set a name, they end up taking a lot of space and we don't need
// them.
+
+ // Zero-initialize the FunctionData, except for functions that have
+ // musttail calls. There, we set the CtxRoot field to 1, which will be
+ // treated as a "can't be set as root".
TheRootFuctionData = new GlobalVariable(
M, FunctionDataTy, false, GlobalVariable::InternalLinkage,
- Constant::getNullValue(FunctionDataTy));
+ HasMusttail ? CannotBeRootInitializer
+ : Constant::getNullValue(FunctionDataTy));
if (ContextRootSet.contains(&F)) {
Context = Builder.CreateCall(
@@ -366,10 +399,6 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
}
}
}
- // FIXME: This would happen if the entrypoint tailcalls. A way to fix would be
- // to disallow this, (so this then stays as an error), another is to detect
- // that and then do a wrapper or disallow the tail call. This only affects
- // instrumentation, when we want to detect the call graph.
if (!HasMusttail && !ContextWasReleased)
F.getContext().emitError(
"[ctx_prof] A function that doesn't have musttail calls was "
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
index 8f72711a9c8b1..6b2f25a585ec3 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -18,7 +18,7 @@ declare void @bar()
; LOWERING: @[[GLOB4:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
; LOWERING: @[[GLOB5:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
; LOWERING: @[[GLOB6:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
-; LOWERING: @[[GLOB7:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } zeroinitializer
+; LOWERING: @[[GLOB7:[0-9]+]] = internal global { ptr, ptr, ptr, ptr, i8 } { ptr null, ptr null, ptr inttoptr (i64 1 to ptr), ptr null, i8 0 }
;.
define void @foo(i32 %a, ptr %fct) {
; INSTRUMENT-LABEL: define void @foo(
|
musttail
calls
739bc3d
to
485788d
Compare
ilovepi
reviewed
Apr 11, 2025
snehasish
approved these changes
Apr 12, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
7420f38
to
3a9a271
Compare
3a9a271
to
5d3c9e9
Compare
bcardosolopes
added a commit
to bcardosolopes/llvm-project
that referenced
this pull request
Apr 14, 2025
* origin/main: (199 commits) [NFC][AsmPrinter] Refactor AsmPrinter and AArch64AsmPrinter to prepare for jump table partitions on aarch64 (llvm#125993) [HEXAGON] Fix corner cases for hwloops pass (llvm#135439) [flang] Handle volatility in lowering and codegen (llvm#135311) [MLIR][Shape] Support >2 args in `shape.broadcast` folder (llvm#126808) [DirectX] Use scalar arguments for @llvm.dx.dot intrinsics (llvm#134570) Remove the redundant check for "WeakPtr" in isSmartPtrClass to fix the issue 135612. (llvm#135629) [BOLT] Support relative vtable (llvm#135449) [flang] Fix linking to libMLIR (llvm#135483) [AsmPrinter] Link .section_sizes to the correct section (llvm#135583) [ctxprof] Handle instrumenting functions with `musttail` calls (llvm#135121) [SystemZ] Consider VST/VL as SimpleBDXStore/Load (llvm#135623) [libc++][CI] Pin the XCode version. (llvm#135412) [lldb-dap] Fix win32 build. (llvm#135638) [Interp] Mark inline-virtual.cpp as unsupported with ASan (llvm#135402) [libc++] Removes the _LIBCPP_VERBOSE_ABORT_NOT_NOEXCEPT macro. (llvm#135494) [CVP] Add tests for ucmp/scmp with switch (NFC) [mlir][tosa] Align AbsOp example variable names (llvm#135268) [mlir][tosa] Align AddOp examples to spec (llvm#135266) [mlir][tosa] Align RFFT2d and FFT2d operator examples (llvm#135261) [flang][OpenMP][HLFIR] Support vector subscripted array sections for DEPEND (llvm#133892) ...
var-const
pushed a commit
to ldionne/llvm-project
that referenced
this pull request
Apr 17, 2025
…135121) Functions with `musttail` calls can't be roots because we can't instrument their `ret` to release the context. This patch tags their `CtxRoot` field in their `FunctionData`. In compiler-rt we then know not to allow such functions become roots, and also not confuse `CtxRoot == 0x1` with there being a context root. Currently we also lose the context tree under such cases. We can, in a subsequent patch, have the root detector search past these functions.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Functions with
musttail
calls can't be roots because we can't instrument theirret
to release the context. This patch tags theirCtxRoot
field in theirFunctionData
. In compiler-rt we then know not to allow such functions become roots, and also not confuseCtxRoot == 0x1
with there being a context root.Currently we also lose the context tree under such cases. We can, in a subsequent patch, have the root detector search past these functions.