Skip to content

Commit 5147e59

Browse files
authored
[GSYM] Callsites: Add data format support and loading from YAML (#109781)
This PR adds support in the gSYM format for call site information and adds support for loading call sites from a YAML file. The support for YAML input is mostly for testing purposes - so we have a way to test the functionality. Note that this data is not currently used in the gSYM tooling - the logic to use call sites will be added in a later PR. The reason why we need call site information in gSYM files is so that we can support better call stack function disambiguation in the case where multiple functions have been merged due to optimization (linker ICF). When resolving a merged function on the callstack, we can use the call site information of the calling function to narrow down the actual function that is being called, from the set of all merged functions. See [this RFC](https://discourse.llvm.org/t/rfc-extending-gsym-format-with-call-site-information-for-merged-function-disambiguation/80682) for more details on this change.
1 parent 06514c5 commit 5147e59

File tree

15 files changed

+2358
-7
lines changed

15 files changed

+2358
-7
lines changed
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
//===- CallSiteInfo.h -------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H
10+
#define LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H
11+
12+
#include "llvm/ADT/BitmaskEnum.h"
13+
#include "llvm/ADT/StringRef.h"
14+
#include "llvm/ADT/StringSet.h"
15+
#include "llvm/Support/Error.h"
16+
#include <vector>
17+
18+
namespace llvm {
19+
class DataExtractor;
20+
class raw_ostream;
21+
22+
namespace yaml {
23+
struct FunctionsYAML;
24+
} // namespace yaml
25+
26+
namespace gsym {
27+
class FileWriter;
28+
class GsymCreator;
29+
struct FunctionInfo;
30+
struct CallSiteInfo {
31+
enum Flags : uint8_t {
32+
None = 0,
33+
// This flag specifies that the call site can only call a function within
34+
// the same link unit as the call site.
35+
InternalCall = 1 << 0,
36+
// This flag specifies that the call site can only call a function outside
37+
// the link unit that the call site is in.
38+
ExternalCall = 1 << 1,
39+
40+
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ ExternalCall),
41+
};
42+
43+
/// The return offset of the call site - relative to the function start.
44+
uint64_t ReturnOffset = 0;
45+
46+
/// Offsets into the string table for function names regex patterns.
47+
std::vector<uint32_t> MatchRegex;
48+
49+
/// Bitwise OR of CallSiteInfo::Flags values
50+
uint8_t Flags = CallSiteInfo::Flags::None;
51+
52+
/// Decode a CallSiteInfo object from a binary data stream.
53+
///
54+
/// \param Data The binary stream to read the data from.
55+
/// \param Offset The current offset within the data stream.
56+
/// \returns A CallSiteInfo or an error describing the issue.
57+
static llvm::Expected<CallSiteInfo> decode(DataExtractor &Data,
58+
uint64_t &Offset);
59+
60+
/// Encode this CallSiteInfo object into a FileWriter stream.
61+
///
62+
/// \param O The binary stream to write the data to.
63+
/// \returns An error object that indicates success or failure.
64+
llvm::Error encode(FileWriter &O) const;
65+
};
66+
67+
struct CallSiteInfoCollection {
68+
std::vector<CallSiteInfo> CallSites;
69+
70+
/// Decode a CallSiteInfoCollection object from a binary data stream.
71+
///
72+
/// \param Data The binary stream to read the data from.
73+
/// \returns A CallSiteInfoCollection or an error describing the issue.
74+
static llvm::Expected<CallSiteInfoCollection> decode(DataExtractor &Data);
75+
76+
/// Encode this CallSiteInfoCollection object into a FileWriter stream.
77+
///
78+
/// \param O The binary stream to write the data to.
79+
/// \returns An error object that indicates success or failure.
80+
llvm::Error encode(FileWriter &O) const;
81+
};
82+
83+
class CallSiteInfoLoader {
84+
public:
85+
/// Constructor that initializes the CallSiteInfoLoader with necessary data
86+
/// structures.
87+
///
88+
/// \param GCreator A reference to the GsymCreator.
89+
CallSiteInfoLoader(GsymCreator &GCreator, std::vector<FunctionInfo> &Funcs)
90+
: GCreator(GCreator), Funcs(Funcs) {}
91+
92+
/// This method reads the specified YAML file, parses its content, and updates
93+
/// the `Funcs` vector with call site information based on the YAML data.
94+
///
95+
/// \param Funcs A reference to a vector of FunctionInfo objects to be
96+
/// populated.
97+
/// \param YAMLFile A StringRef representing the path to the YAML
98+
/// file to be loaded.
99+
/// \returns An `llvm::Error` indicating success or describing any issues
100+
/// encountered during the loading process.
101+
llvm::Error loadYAML(StringRef YAMLFile);
102+
103+
private:
104+
/// Builds a map from function names to FunctionInfo pointers based on the
105+
/// provided `Funcs` vector.
106+
///
107+
/// \param Funcs A reference to a vector of FunctionInfo objects.
108+
/// \returns A StringMap mapping function names (StringRef) to their
109+
/// corresponding FunctionInfo pointers.
110+
StringMap<FunctionInfo *> buildFunctionMap();
111+
112+
/// Processes the parsed YAML functions and updates the `FuncMap` accordingly.
113+
///
114+
/// \param FuncYAMLs A constant reference to an llvm::yaml::FunctionsYAML
115+
/// object containing parsed YAML data.
116+
/// \param FuncMap A reference to a StringMap mapping function names to
117+
/// FunctionInfo pointers.
118+
/// \returns An `llvm::Error` indicating success or describing any issues
119+
/// encountered during processing.
120+
llvm::Error processYAMLFunctions(const llvm::yaml::FunctionsYAML &FuncYAMLs,
121+
StringMap<FunctionInfo *> &FuncMap);
122+
123+
/// Reference to the parent Gsym Creator object.
124+
GsymCreator &GCreator;
125+
126+
/// Reference to the vector of FunctionInfo objects to be populated.
127+
std::vector<FunctionInfo> &Funcs;
128+
};
129+
130+
raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfo &CSI);
131+
raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfoCollection &CSIC);
132+
133+
} // namespace gsym
134+
} // namespace llvm
135+
136+
#endif // LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H

llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
1111

1212
#include "llvm/ADT/SmallString.h"
13+
#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
1314
#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
1415
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
1516
#include "llvm/DebugInfo/GSYM/LineTable.h"
@@ -63,7 +64,9 @@ class GsymReader;
6364
/// enum InfoType {
6465
/// EndOfList = 0u,
6566
/// LineTableInfo = 1u,
66-
/// InlineInfo = 2u
67+
/// InlineInfo = 2u,
68+
/// MergedFunctionsInfo = 3u,
69+
/// CallSiteInfo = 4u
6770
/// };
6871
///
6972
/// This stream of tuples is terminated by a "InfoType" whose value is
@@ -73,7 +76,7 @@ class GsymReader;
7376
/// clients to still parse the format and skip over any data that they don't
7477
/// understand or want to parse.
7578
///
76-
/// So the function information encoding essientially looks like:
79+
/// So the function information encoding essentially looks like:
7780
///
7881
/// struct {
7982
/// uint32_t Size;
@@ -92,6 +95,7 @@ struct FunctionInfo {
9295
std::optional<LineTable> OptLineTable;
9396
std::optional<InlineInfo> Inline;
9497
std::optional<MergedFunctionsInfo> MergedFunctions;
98+
std::optional<CallSiteInfoCollection> CallSites;
9599
/// If we encode a FunctionInfo during segmenting so we know its size, we can
96100
/// cache that encoding here so we don't need to re-encode it when saving the
97101
/// GSYM file.
@@ -107,7 +111,7 @@ struct FunctionInfo {
107111
/// debug info, we might end up with multiple FunctionInfo objects for the
108112
/// same range and we need to be able to tell which one is the better object
109113
/// to use.
110-
bool hasRichInfo() const { return OptLineTable || Inline; }
114+
bool hasRichInfo() const { return OptLineTable || Inline || CallSites; }
111115

112116
/// Query if a FunctionInfo object is valid.
113117
///

llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,16 @@ class GsymCreator {
329329
/// \returns The unique 32 bit offset into the string table.
330330
uint32_t insertString(StringRef S, bool Copy = true);
331331

332+
/// Retrieve a string from the GSYM string table given its offset.
333+
///
334+
/// The offset is assumed to be a valid offset into the string table.
335+
/// otherwise an assert will be triggered.
336+
///
337+
/// \param Offset The offset of the string to retrieve, previously returned by
338+
/// insertString.
339+
/// \returns The string at the given offset in the string table.
340+
StringRef getString(uint32_t Offset);
341+
332342
/// Insert a file into this GSYM creator.
333343
///
334344
/// Inserts a file by adding a FileEntry into the "Files" member variable if
@@ -352,13 +362,22 @@ class GsymCreator {
352362
/// \param FI The function info object to emplace into our functions list.
353363
void addFunctionInfo(FunctionInfo &&FI);
354364

365+
/// Load call site information from a YAML file.
366+
///
367+
/// This function reads call site information from a specified YAML file and
368+
/// adds it to the GSYM data.
369+
///
370+
/// \param YAMLFile The path to the YAML file containing call site
371+
/// information.
372+
llvm::Error loadCallSitesFromYAML(StringRef YAMLFile);
373+
355374
/// Organize merged FunctionInfo's
356375
///
357376
/// This method processes the list of function infos (Funcs) to identify and
358377
/// group functions with overlapping address ranges.
359378
///
360379
/// \param Out Output stream to report information about how merged
361-
/// FunctionInfo's were handeled.
380+
/// FunctionInfo's were handled.
362381
void prepareMergedFunctions(OutputAggregator &Out);
363382

364383
/// Finalize the data in the GSYM creator prior to saving the data out.

llvm/include/llvm/DebugInfo/GSYM/GsymReader.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,26 @@ class GsymReader {
181181
/// \param MFI The object to dump.
182182
void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
183183

184+
/// Dump a CallSiteInfo object.
185+
///
186+
/// This function will output the details of a CallSiteInfo object in a
187+
/// human-readable format.
188+
///
189+
/// \param OS The output stream to dump to.
190+
///
191+
/// \param CSI The CallSiteInfo object to dump.
192+
void dump(raw_ostream &OS, const CallSiteInfo &CSI);
193+
194+
/// Dump a CallSiteInfoCollection object.
195+
///
196+
/// This function will iterate over a collection of CallSiteInfo objects and
197+
/// dump each one.
198+
///
199+
/// \param OS The output stream to dump to.
200+
///
201+
/// \param CSIC The CallSiteInfoCollection object to dump.
202+
void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC);
203+
184204
/// Dump a LineTable object.
185205
///
186206
/// This function will convert any string table indexes and file indexes

llvm/lib/DebugInfo/GSYM/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM
88
InlineInfo.cpp
99
LineTable.cpp
1010
LookupResult.cpp
11+
CallSiteInfo.cpp
1112
MergedFunctionsInfo.cpp
1213
ObjectFileTransformer.cpp
1314
ExtractRanges.cpp

0 commit comments

Comments
 (0)