Skip to content

Commit 194069a

Browse files
committed
Add origin-tracking support for Unix in LLVM
1 parent abab69a commit 194069a

File tree

13 files changed

+344
-36
lines changed

13 files changed

+344
-36
lines changed

llvm/cmake/modules/HandleLLVMOptions.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ string(TOUPPER "${LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING}" uppercase_LLVM_ENABLE
201201
if( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE" )
202202
set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 )
203203
elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "COVERAGE_AND_ORIGIN" )
204-
message(FATAL_ERROR "\"COVERAGE_AND_ORIGIN\" setting for LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING currently unimplemented.")
204+
set( ENABLE_DEBUGLOC_COVERAGE_TRACKING 1 )
205+
set( ENABLE_DEBUGLOC_ORIGIN_TRACKING 1 )
205206
elseif( uppercase_LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING STREQUAL "DISABLED" OR NOT DEFINED LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING )
206207
# The DISABLED setting is default and requires no additional defines.
207208
else()

llvm/include/llvm/Config/config.h.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
and to 0 otherwise. */
2424
#cmakedefine01 ENABLE_DEBUGLOC_COVERAGE_TRACKING
2525

26+
/* Define to 1 to enable expensive tracking of the origin of debug location
27+
coverage bugs, and to 0 otherwise. */
28+
#cmakedefine01 ENABLE_DEBUGLOC_ORIGIN_TRACKING
29+
2630
/* Define to 1 to prefer forward slashes on Windows, and to 0 prefer
2731
backslashes. */
2832
#cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH

llvm/include/llvm/IR/DebugLoc.h

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@ namespace llvm {
2626
class Function;
2727

2828
#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
29+
#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
30+
struct DbgLocOrigin {
31+
static constexpr unsigned long MaxDepth = 16;
32+
using StackTracesTy =
33+
SmallVector<std::pair<int, std::array<void *, MaxDepth>>, 0>;
34+
StackTracesTy StackTraces;
35+
DbgLocOrigin(bool ShouldCollectTrace);
36+
void addTrace();
37+
const StackTracesTy &getOriginStackTraces() const { return StackTraces; };
38+
};
39+
#else
40+
struct DbgLocOrigin {
41+
DbgLocOrigin(bool) {}
42+
}
43+
#endif
44+
2945
// Used to represent different "kinds" of DebugLoc, expressing that a DebugLoc
3046
// is either ordinary, containing a valid DILocation, or otherwise describing
3147
// the reason why the DebugLoc does not contain a valid DILocation.
@@ -48,22 +64,26 @@ namespace llvm {
4864
Temporary
4965
};
5066

51-
// Extends TrackingMDNodeRef to also store a DebugLocKind, allowing Debugify
52-
// to ignore intentionally-empty DebugLocs.
53-
class DILocAndCoverageTracking : public TrackingMDNodeRef {
67+
// Extends TrackingMDNodeRef to also store a DebugLocKind and Origin,
68+
// allowing Debugify to ignore intentionally-empty DebugLocs and display the
69+
// code responsible for generating unintentionally-empty DebugLocs.
70+
// Currently we only need to track the Origin of this DILoc when using a
71+
// DebugLoc that is Normal and empty, so only collect the origin stacktrace in
72+
// those cases.
73+
class DILocAndCoverageTracking : public TrackingMDNodeRef, public DbgLocOrigin {
5474
public:
5575
DebugLocKind Kind;
5676
// Default constructor for empty DebugLocs.
5777
DILocAndCoverageTracking()
58-
: TrackingMDNodeRef(nullptr), Kind(DebugLocKind::Normal) {}
59-
// Valid or nullptr MDNode*, normal DebugLocKind.
78+
: TrackingMDNodeRef(nullptr), DbgLocOrigin(true), Kind(DebugLocKind::Normal) {}
79+
// Valid or nullptr MDNode*, normal DebugLocKind
6080
DILocAndCoverageTracking(const MDNode *Loc)
61-
: TrackingMDNodeRef(const_cast<MDNode *>(Loc)),
81+
: TrackingMDNodeRef(const_cast<MDNode *>(Loc)), DbgLocOrigin(!Loc),
6282
Kind(DebugLocKind::Normal) {}
6383
DILocAndCoverageTracking(const DILocation *Loc);
64-
// Explicit DebugLocKind, which always means a nullptr MDNode*.
84+
// Always nullptr MDNode*, any DebugLocKind
6585
DILocAndCoverageTracking(DebugLocKind Kind)
66-
: TrackingMDNodeRef(nullptr), Kind(Kind) {}
86+
: TrackingMDNodeRef(nullptr), DbgLocOrigin(Kind == DebugLocKind::Normal), Kind(Kind) {}
6787
};
6888
template <> struct simplify_type<DILocAndCoverageTracking> {
6989
using SimpleType = MDNode *;
@@ -115,6 +135,23 @@ namespace llvm {
115135
DebugLocKind getKind() const { return Loc.Kind; }
116136
#endif
117137

138+
#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
139+
#if !ENABLE_DEBUGLOC_COVERAGE_TRACKING
140+
#error Cannot enable DebugLoc origin-tracking without coverage-tracking!
141+
#endif
142+
143+
const DbgLocOrigin::StackTracesTy &getOriginStackTraces() const {
144+
return Loc.getOriginStackTraces();
145+
}
146+
DebugLoc getCopied() const {
147+
DebugLoc NewDL = *this;
148+
NewDL.Loc.addTrace();
149+
return NewDL;
150+
}
151+
#else
152+
DebugLoc getCopied() const { return *this; }
153+
#endif
154+
118155
static DebugLoc getTemporary();
119156
static DebugLoc getUnknown();
120157
static DebugLoc getLineZero();

llvm/include/llvm/Support/Signals.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,31 @@
1414
#ifndef LLVM_SUPPORT_SIGNALS_H
1515
#define LLVM_SUPPORT_SIGNALS_H
1616

17+
#include "llvm/Config/config.h"
18+
#include <array>
1719
#include <cstdint>
1820
#include <string>
1921

2022
namespace llvm {
2123
class StringRef;
2224
class raw_ostream;
2325

26+
#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
27+
// Typedefs that are convenient but only used by the StackTrace-collection code
28+
// added if DebugLoc origin-tracking is enabled.
29+
template <typename T, typename Enable> struct DenseMapInfo;
30+
template <typename ValueT, typename ValueInfoT> class DenseSet;
31+
namespace detail {
32+
template <typename KeyT, typename ValueT> struct DenseMapPair;
33+
}
34+
template <typename KeyT, typename ValueT, typename KeyInfoT, typename BucketT>
35+
class DenseMap;
36+
using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
37+
using SymbolizedAddressMap =
38+
DenseMap<void *, std::string, DenseMapInfo<void *, void>,
39+
detail::DenseMapPair<void *, std::string>>;
40+
#endif
41+
2442
namespace sys {
2543

2644
/// This function runs all the registered interrupt handlers, including the
@@ -55,6 +73,28 @@ namespace sys {
5573
/// specified, the entire frame is printed.
5674
void PrintStackTrace(raw_ostream &OS, int Depth = 0);
5775

76+
#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
77+
#ifdef NDEBUG
78+
#error DebugLoc origin-tracking should not be enabled in Release builds.
79+
#endif
80+
/// Populates the given array with a stacktrace of the current program, up to
81+
/// MaxDepth frames. Returns the number of frames returned, which will be
82+
/// inserted into \p StackTrace from index 0. All entries after the returned
83+
/// depth will be unmodified. NB: This is only intended to be used for
84+
/// introspection of LLVM by Debugify, will not be enabled in release builds,
85+
/// and should not be relied on for other purposes.
86+
template <unsigned long MaxDepth>
87+
int getStackTrace(std::array<void *, MaxDepth> &StackTrace);
88+
89+
/// Takes a set of \p Addresses, symbolizes them and stores the result in the
90+
/// provided \p SymbolizedAddresses map.
91+
/// NB: This is only intended to be used for introspection of LLVM by
92+
/// Debugify, will not be enabled in release builds, and should not be relied
93+
/// on for other purposes.
94+
void symbolizeAddresses(AddressSet &Addresses,
95+
SymbolizedAddressMap &SymbolizedAddresses);
96+
#endif
97+
5898
// Run all registered signal handlers.
5999
void RunSignalHandlers();
60100

llvm/lib/CodeGen/BranchFolding.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -915,7 +915,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
915915
// Walk through equivalence sets looking for actual exact matches.
916916
while (MergePotentials.size() > 1) {
917917
unsigned CurHash = MergePotentials.back().getHash();
918-
const DebugLoc &BranchDL = MergePotentials.back().getBranchDebugLoc();
918+
const DebugLoc BranchDL = MergePotentials.back().getBranchDebugLoc();
919919

920920
// Build SameTails, identifying the set of blocks with this hash code
921921
// and with the maximum number of instructions in common.

llvm/lib/CodeGen/BranchFolding.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,15 @@ class TargetRegisterInfo;
5050
class MergePotentialsElt {
5151
unsigned Hash;
5252
MachineBasicBlock *Block;
53-
DebugLoc BranchDebugLoc;
53+
// We use MDNode rather than DebugLoc here because under certain CMake
54+
// options*, DebugLoc may contain a SmallVector used for introspection
55+
// purposes, which causes errors when stored here.
56+
// *LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING=COVERAGE_AND_ORIGIN
57+
MDNode *BranchDebugLoc;
5458

5559
public:
56-
MergePotentialsElt(unsigned h, MachineBasicBlock *b, DebugLoc bdl)
57-
: Hash(h), Block(b), BranchDebugLoc(std::move(bdl)) {}
60+
MergePotentialsElt(unsigned h, MachineBasicBlock *b, MDNode *bdl)
61+
: Hash(h), Block(b), BranchDebugLoc(bdl) {}
5862

5963
unsigned getHash() const { return Hash; }
6064
MachineBasicBlock *getBlock() const { return Block; }
@@ -63,7 +67,7 @@ class TargetRegisterInfo;
6367
Block = MBB;
6468
}
6569

66-
const DebugLoc &getBranchDebugLoc() { return BranchDebugLoc; }
70+
const DebugLoc getBranchDebugLoc() { return DebugLoc(BranchDebugLoc); }
6771

6872
bool operator<(const MergePotentialsElt &) const;
6973
};

llvm/lib/IR/DebugLoc.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,31 @@
99
#include "llvm/IR/DebugLoc.h"
1010
#include "llvm/Config/llvm-config.h"
1111
#include "llvm/IR/DebugInfo.h"
12+
13+
#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
14+
#include "llvm/Support/Signals.h"
15+
16+
namespace llvm {
17+
DbgLocOrigin::DbgLocOrigin(bool ShouldCollectTrace) {
18+
if (ShouldCollectTrace) {
19+
auto &[Depth, StackTrace] = StackTraces.emplace_back();
20+
Depth = sys::getStackTrace(StackTrace);
21+
}
22+
}
23+
void DbgLocOrigin::addTrace() {
24+
if (StackTraces.empty())
25+
return;
26+
auto &[Depth, StackTrace] = StackTraces.emplace_back();
27+
Depth = sys::getStackTrace(StackTrace);
28+
}
29+
}
30+
#endif
31+
1232
using namespace llvm;
1333

1434
#if ENABLE_DEBUGLOC_COVERAGE_TRACKING
1535
DILocAndCoverageTracking::DILocAndCoverageTracking(const DILocation *L)
16-
: TrackingMDNodeRef(const_cast<DILocation *>(L)),
36+
: TrackingMDNodeRef(const_cast<DILocation *>(L)), DbgLocOrigin(!L),
1737
Kind(DebugLocKind::Normal) {}
1838

1939
DebugLoc DebugLoc::getTemporary() { return DebugLoc(DebugLocKind::Temporary); }

llvm/lib/IR/Instruction.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,6 +1279,9 @@ void Instruction::swapProfMetadata() {
12791279

12801280
void Instruction::copyMetadata(const Instruction &SrcInst,
12811281
ArrayRef<unsigned> WL) {
1282+
if (WL.empty() || is_contained(WL, LLVMContext::MD_dbg))
1283+
setDebugLoc(SrcInst.getDebugLoc());
1284+
12821285
if (!SrcInst.hasMetadata())
12831286
return;
12841287

@@ -1292,8 +1295,6 @@ void Instruction::copyMetadata(const Instruction &SrcInst,
12921295
if (WL.empty() || WLS.count(MD.first))
12931296
setMetadata(MD.first, MD.second);
12941297
}
1295-
if (WL.empty() || WLS.count(LLVMContext::MD_dbg))
1296-
setDebugLoc(SrcInst.getDebugLoc());
12971298
}
12981299

12991300
Instruction *Instruction::clone() const {
@@ -1311,5 +1312,6 @@ Instruction *Instruction::clone() const {
13111312

13121313
New->SubclassOptionalData = SubclassOptionalData;
13131314
New->copyMetadata(*this);
1315+
New->setDebugLoc(getDebugLoc().getCopied());
13141316
return New;
13151317
}

llvm/lib/Support/Signals.cpp

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,122 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
253253
return true;
254254
}
255255

256+
#if ENABLE_DEBUGLOC_ORIGIN_TRACKING
257+
void sys::symbolizeAddresses(AddressSet &Addresses,
258+
SymbolizedAddressMap &SymbolizedAddresses) {
259+
assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) &&
260+
"Debugify origin stacktraces require symbolization to be enabled.");
261+
262+
// Convert Set of Addresses to ordered list.
263+
SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end());
264+
if (AddressList.empty())
265+
return;
266+
int NumAddresses = AddressList.size();
267+
llvm::sort(AddressList);
268+
269+
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
270+
// alongside our binary, then in $PATH.
271+
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
272+
if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
273+
LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
274+
}
275+
if (!LLVMSymbolizerPathOrErr)
276+
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
277+
assert(!!LLVMSymbolizerPathOrErr &&
278+
"Debugify origin stacktraces require llvm-symbolizer.");
279+
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
280+
281+
// Try to guess the main executable name, since we don't have argv0 available
282+
// here.
283+
std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
284+
285+
BumpPtrAllocator Allocator;
286+
StringSaver StrPool(Allocator);
287+
std::vector<const char *> Modules(NumAddresses, nullptr);
288+
std::vector<intptr_t> Offsets(NumAddresses, 0);
289+
if (!findModulesAndOffsets(AddressList.data(), NumAddresses, Modules.data(),
290+
Offsets.data(), MainExecutableName.c_str(),
291+
StrPool))
292+
return;
293+
int InputFD;
294+
SmallString<32> InputFile, OutputFile;
295+
sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
296+
sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile);
297+
FileRemover InputRemover(InputFile.c_str());
298+
FileRemover OutputRemover(OutputFile.c_str());
299+
300+
{
301+
raw_fd_ostream Input(InputFD, true);
302+
for (int i = 0; i < NumAddresses; i++) {
303+
if (Modules[i])
304+
Input << Modules[i] << " " << (void *)Offsets[i] << "\n";
305+
}
306+
}
307+
308+
std::optional<StringRef> Redirects[] = {InputFile.str(), OutputFile.str(),
309+
StringRef("")};
310+
StringRef Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
311+
#ifdef _WIN32
312+
// Pass --relative-address on Windows so that we don't
313+
// have to add ImageBase from PE file.
314+
// FIXME: Make this the default for llvm-symbolizer.
315+
"--relative-address",
316+
#endif
317+
"--demangle"};
318+
int RunResult =
319+
sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects);
320+
if (RunResult != 0)
321+
return;
322+
323+
// This report format is based on the sanitizer stack trace printer. See
324+
// sanitizer_stacktrace_printer.cc in compiler-rt.
325+
auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
326+
if (!OutputBuf)
327+
return;
328+
StringRef Output = OutputBuf.get()->getBuffer();
329+
SmallVector<StringRef, 32> Lines;
330+
Output.split(Lines, "\n");
331+
auto CurLine = Lines.begin();
332+
for (int i = 0; i < NumAddresses; i++) {
333+
assert(!SymbolizedAddresses.contains(AddressList[i]));
334+
std::string &SymbolizedAddr = SymbolizedAddresses[AddressList[i]];
335+
raw_string_ostream OS(SymbolizedAddr);
336+
if (!Modules[i]) {
337+
OS << format_ptr(AddressList[i]) << '\n';
338+
continue;
339+
}
340+
// Read pairs of lines (function name and file/line info) until we
341+
// encounter empty line.
342+
for (bool IsFirst = true;; IsFirst = false) {
343+
if (CurLine == Lines.end())
344+
return;
345+
StringRef FunctionName = *CurLine++;
346+
if (FunctionName.empty())
347+
break;
348+
// Add indentation for lines after the first; we use 3 spaces, because
349+
// currently that aligns with the expected indentation that will be added
350+
// to the first line by Debugify.
351+
if (!IsFirst)
352+
OS << " ";
353+
OS << format_ptr(AddressList[i]) << ' ';
354+
if (!FunctionName.starts_with("??"))
355+
OS << FunctionName << ' ';
356+
if (CurLine == Lines.end()) {
357+
OS << '\n';
358+
return;
359+
}
360+
StringRef FileLineInfo = *CurLine++;
361+
if (!FileLineInfo.starts_with("??"))
362+
OS << FileLineInfo;
363+
else
364+
OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
365+
OS << '\n';
366+
}
367+
}
368+
return;
369+
}
370+
#endif
371+
256372
static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
257373

258374
LLVM_ATTRIBUTE_USED

0 commit comments

Comments
 (0)