Skip to content

Commit cb5dc1f

Browse files
authored
[gSYM] Add support merged functions in gSYM format (llvm#101604)
This patch introduces support for storing debug info for merged functions in the GSYM debug info. It allows GSYM to represent multiple functions that share the same address range, which occur when multiple functions are merged during linker ICF. The core of this functionality is the new `MergedFunctionsInfo` class, which is integrated into the existing `FunctionInfo` structure. During GSYM creation, functions with identical address ranges are now grouped together, with one function serving as the "master" and the others becoming "merged" functions. This organization is preserved in the GSYM format and can be read back and displayed when dumping GSYM information. Old readers will only see the master function, and ther "merged" functions will not be processed. Note: This patch just adds the functionality to the gSYM format - additional changes to the gsym format and algorithmic changes to logic existing tooling are needed to take advantage of this data. Exact output of `llvm-gsymutil --verify --verbose` for the included test: [gist](https://gist.github.com/alx32/b9c104d7f87c0b3e7b4171399fc2dca3)
1 parent b3b6f7c commit cb5dc1f

File tree

12 files changed

+1014
-12
lines changed

12 files changed

+1014
-12
lines changed

llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
1515
#include "llvm/DebugInfo/GSYM/LineTable.h"
1616
#include "llvm/DebugInfo/GSYM/LookupResult.h"
17+
#include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
1718
#include "llvm/DebugInfo/GSYM/StringTable.h"
1819
#include <cstdint>
1920

@@ -90,6 +91,7 @@ struct FunctionInfo {
9091
uint32_t Name; ///< String table offset in the string table.
9192
std::optional<LineTable> OptLineTable;
9293
std::optional<InlineInfo> Inline;
94+
std::optional<MergedFunctionsInfo> MergedFunctions;
9395
/// If we encode a FunctionInfo during segmenting so we know its size, we can
9496
/// cache that encoding here so we don't need to re-encode it when saving the
9597
/// GSYM file.
@@ -140,9 +142,16 @@ struct FunctionInfo {
140142
/// \param O The binary stream to write the data to at the current file
141143
/// position.
142144
///
145+
/// \param NoPadding Directly write the FunctionInfo data, without any padding
146+
/// By default, FunctionInfo will be 4-byte aligned by padding with
147+
/// 0's at the start. This is OK since the function will return the offset of
148+
/// actual data in the stream. However when writing FunctionInfo's as a
149+
/// stream, the padding will break the decoding of the data - since the offset
150+
/// where the FunctionInfo starts is not kept in this scenario.
151+
///
143152
/// \returns An error object that indicates failure or the offset of the
144153
/// function info that was successfully written into the stream.
145-
llvm::Expected<uint64_t> encode(FileWriter &O) const;
154+
llvm::Expected<uint64_t> encode(FileWriter &O, bool NoPadding = false) const;
146155

147156
/// Encode this function info into the internal byte cache and return the size
148157
/// in bytes.

llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,15 @@ class GsymCreator {
352352
/// \param FI The function info object to emplace into our functions list.
353353
void addFunctionInfo(FunctionInfo &&FI);
354354

355+
/// Organize merged FunctionInfo's
356+
///
357+
/// This method processes the list of function infos (Funcs) to identify and
358+
/// group functions with overlapping address ranges.
359+
///
360+
/// \param Out Output stream to report information about how merged
361+
/// FunctionInfo's were handeled.
362+
void prepareMergedFunctions(OutputAggregator &Out);
363+
355364
/// Finalize the data in the GSYM creator prior to saving the data out.
356365
///
357366
/// Finalize must be called after all FunctionInfo objects have been added

llvm/include/llvm/DebugInfo/GSYM/GsymReader.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,20 @@ class GsymReader {
166166
/// \param OS The output stream to dump to.
167167
///
168168
/// \param FI The object to dump.
169-
void dump(raw_ostream &OS, const FunctionInfo &FI);
169+
///
170+
/// \param Indent The indentation as number of spaces. Used when dumping as an
171+
/// item within MergedFunctionsInfo.
172+
void dump(raw_ostream &OS, const FunctionInfo &FI, uint32_t Indent = 0);
173+
174+
/// Dump a MergedFunctionsInfo object.
175+
///
176+
/// This function will dump a MergedFunctionsInfo object - basically by
177+
/// dumping the contained FunctionInfo objects with indentation.
178+
///
179+
/// \param OS The output stream to dump to.
180+
///
181+
/// \param MFI The object to dump.
182+
void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
170183

171184
/// Dump a LineTable object.
172185
///
@@ -177,7 +190,10 @@ class GsymReader {
177190
/// \param OS The output stream to dump to.
178191
///
179192
/// \param LT The object to dump.
180-
void dump(raw_ostream &OS, const LineTable &LT);
193+
///
194+
/// \param Indent The indentation as number of spaces. Used when dumping as an
195+
/// item from within MergedFunctionsInfo.
196+
void dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent = 0);
181197

182198
/// Dump a InlineInfo object.
183199
///
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
//===- MergedFunctionsInfo.h ------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_DEBUGINFO_GSYM_MERGEDFUNCTIONSINFO_H
10+
#define LLVM_DEBUGINFO_GSYM_MERGEDFUNCTIONSINFO_H
11+
12+
#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
13+
#include "llvm/Support/Error.h"
14+
#include <stdint.h>
15+
#include <vector>
16+
17+
namespace llvm {
18+
class raw_ostream;
19+
20+
namespace gsym {
21+
22+
class GsymReader;
23+
struct FunctionInfo;
24+
struct MergedFunctionsInfo {
25+
std::vector<FunctionInfo> MergedFunctions;
26+
27+
void clear() { MergedFunctions.clear(); }
28+
29+
/// Query if a MergedFunctionsInfo object is valid.
30+
///
31+
/// \returns A boolean indicating if this FunctionInfo is valid.
32+
bool isValid() { return !MergedFunctions.empty(); }
33+
34+
/// Decode an MergedFunctionsInfo object from a binary data stream.
35+
///
36+
/// \param Data The binary stream to read the data from. This object must have
37+
/// the data for the MergedFunctionsInfo object starting at offset zero. The
38+
/// data can contain more data than needed.
39+
///
40+
/// \param BaseAddr The base address to use when encoding all address ranges.
41+
///
42+
/// \returns An MergedFunctionsInfo or an error describing the issue that was
43+
/// encountered during decoding.
44+
static llvm::Expected<MergedFunctionsInfo> decode(DataExtractor &Data,
45+
uint64_t BaseAddr);
46+
47+
/// Encode this MergedFunctionsInfo object into FileWriter stream.
48+
///
49+
/// \param O The binary stream to write the data to at the current file
50+
/// position.
51+
/// \returns An error object that indicates success or failure for the
52+
/// encoding process.
53+
llvm::Error encode(FileWriter &O) const;
54+
};
55+
56+
bool operator==(const MergedFunctionsInfo &LHS, const MergedFunctionsInfo &RHS);
57+
58+
} // namespace gsym
59+
} // namespace llvm
60+
61+
#endif // LLVM_DEBUGINFO_GSYM_MERGEDFUNCTIONSINFO_H

llvm/lib/DebugInfo/GSYM/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM
88
InlineInfo.cpp
99
LineTable.cpp
1010
LookupResult.cpp
11+
MergedFunctionsInfo.cpp
1112
ObjectFileTransformer.cpp
1213
ExtractRanges.cpp
1314

llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ using namespace gsym;
2222
enum InfoType : uint32_t {
2323
EndOfList = 0u,
2424
LineTableInfo = 1u,
25-
InlineInfo = 2u
25+
InlineInfo = 2u,
26+
MergedFunctionsInfo = 3u,
2627
};
2728

2829
raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
@@ -86,6 +87,14 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
8687
return II.takeError();
8788
break;
8889

90+
case InfoType::MergedFunctionsInfo:
91+
if (Expected<MergedFunctionsInfo> MI =
92+
MergedFunctionsInfo::decode(InfoData, BaseAddr))
93+
FI.MergedFunctions = std::move(MI.get());
94+
else
95+
return MI.takeError();
96+
break;
97+
8998
default:
9099
return createStringError(std::errc::io_error,
91100
"0x%8.8" PRIx64 ": unsupported InfoType %u",
@@ -111,12 +120,14 @@ uint64_t FunctionInfo::cacheEncoding() {
111120
return EncodingCache.size();
112121
}
113122

114-
llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
123+
llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
124+
bool NoPadding) const {
115125
if (!isValid())
116126
return createStringError(std::errc::invalid_argument,
117127
"attempted to encode invalid FunctionInfo object");
118-
// Align FunctionInfo data to a 4 byte alignment.
119-
Out.alignTo(4);
128+
// Align FunctionInfo data to a 4 byte alignment, if padding is allowed
129+
if (NoPadding == false)
130+
Out.alignTo(4);
120131
const uint64_t FuncInfoOffset = Out.tell();
121132
// Check if we have already encoded this function info into EncodingCache.
122133
// This will be non empty when creating segmented GSYM files as we need to
@@ -170,13 +181,31 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
170181
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
171182
}
172183

184+
// Write out the merged functions info if we have any and if it is valid.
185+
if (MergedFunctions) {
186+
Out.writeU32(InfoType::MergedFunctionsInfo);
187+
// Write a uint32_t length as zero for now, we will fix this up after
188+
// writing the LineTable out with the number of bytes that were written.
189+
Out.writeU32(0);
190+
const auto StartOffset = Out.tell();
191+
llvm::Error err = MergedFunctions->encode(Out);
192+
if (err)
193+
return std::move(err);
194+
const auto Length = Out.tell() - StartOffset;
195+
if (Length > UINT32_MAX)
196+
return createStringError(
197+
std::errc::invalid_argument,
198+
"MergedFunctionsInfo length is greater than UINT32_MAX");
199+
// Fixup the size of the MergedFunctionsInfo data with the correct size.
200+
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
201+
}
202+
173203
// Terminate the data chunks with and end of list with zero size
174204
Out.writeU32(InfoType::EndOfList);
175205
Out.writeU32(0);
176206
return FuncInfoOffset;
177207
}
178208

179-
180209
llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
181210
const GsymReader &GR,
182211
uint64_t FuncAddr,

llvm/lib/DebugInfo/GSYM/GsymCreator.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,49 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
189189
return ErrorSuccess();
190190
}
191191

192+
void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
193+
// Nothing to do if we have less than 2 functions.
194+
if (Funcs.size() < 2)
195+
return;
196+
197+
// Sort the function infos by address range first
198+
llvm::sort(Funcs);
199+
std::vector<FunctionInfo> TopLevelFuncs;
200+
201+
// Add the first function info to the top level functions
202+
TopLevelFuncs.emplace_back(std::move(Funcs.front()));
203+
204+
// Now if the next function info has the same address range as the top level,
205+
// then merge it into the top level function, otherwise add it to the top
206+
// level.
207+
for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
208+
FunctionInfo &TopFunc = TopLevelFuncs.back();
209+
FunctionInfo &MatchFunc = Funcs[Idx];
210+
if (TopFunc.Range == MatchFunc.Range) {
211+
// Both have the same range - add the 2nd func as a child of the 1st func
212+
if (!TopFunc.MergedFunctions)
213+
TopFunc.MergedFunctions = MergedFunctionsInfo();
214+
// Avoid adding duplicate functions to MergedFunctions. Since functions
215+
// are already ordered within the Funcs array, we can just check equality
216+
// against the last function in the merged array.
217+
else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
218+
continue;
219+
TopFunc.MergedFunctions->MergedFunctions.emplace_back(
220+
std::move(MatchFunc));
221+
} else
222+
// No match, add the function as a top-level function
223+
TopLevelFuncs.emplace_back(std::move(MatchFunc));
224+
}
225+
226+
uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
227+
// If any functions were merged, print a message about it.
228+
if (mergedCount != 0)
229+
Out << "Have " << mergedCount
230+
<< " merged functions as children of other functions\n";
231+
232+
std::swap(Funcs, TopLevelFuncs);
233+
}
234+
192235
llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
193236
std::lock_guard<std::mutex> Guard(Mutex);
194237
if (Finalized)

llvm/lib/DebugInfo/GSYM/GsymReader.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,17 +398,33 @@ void GsymReader::dump(raw_ostream &OS) {
398398
}
399399
}
400400

401-
void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) {
401+
void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
402+
uint32_t Indent) {
403+
OS.indent(Indent);
402404
OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
403405
if (FI.OptLineTable)
404-
dump(OS, *FI.OptLineTable);
406+
dump(OS, *FI.OptLineTable, Indent);
405407
if (FI.Inline)
406-
dump(OS, *FI.Inline);
408+
dump(OS, *FI.Inline, Indent);
409+
410+
if (FI.MergedFunctions) {
411+
assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
412+
dump(OS, *FI.MergedFunctions);
413+
}
414+
}
415+
416+
void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
417+
for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
418+
OS << "++ Merged FunctionInfos[" << inx << "]:\n";
419+
dump(OS, MFI.MergedFunctions[inx], 4);
420+
}
407421
}
408422

409-
void GsymReader::dump(raw_ostream &OS, const LineTable &LT) {
423+
void GsymReader::dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent) {
424+
OS.indent(Indent);
410425
OS << "LineTable:\n";
411426
for (auto &LE: LT) {
427+
OS.indent(Indent);
412428
OS << " " << HEX64(LE.Addr) << ' ';
413429
if (LE.File)
414430
dump(OS, getFile(LE.File));
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//===- MergedFunctionsInfo.cpp ----------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
10+
#include "llvm/DebugInfo/GSYM/FileWriter.h"
11+
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
12+
#include "llvm/Support/DataExtractor.h"
13+
14+
using namespace llvm;
15+
using namespace gsym;
16+
17+
llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const {
18+
Out.writeU32(MergedFunctions.size());
19+
for (const auto &F : MergedFunctions) {
20+
Out.writeU32(0);
21+
const auto StartOffset = Out.tell();
22+
// Encode the FunctionInfo with no padding so later we can just read them
23+
// one after the other without knowing the offset in the stream for each.
24+
llvm::Expected<uint64_t> result = F.encode(Out, /*NoPadding =*/true);
25+
if (!result)
26+
return result.takeError();
27+
const auto Length = Out.tell() - StartOffset;
28+
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
29+
}
30+
return Error::success();
31+
}
32+
33+
llvm::Expected<MergedFunctionsInfo>
34+
MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
35+
MergedFunctionsInfo MFI;
36+
uint64_t Offset = 0;
37+
uint32_t Count = Data.getU32(&Offset);
38+
39+
for (uint32_t i = 0; i < Count; ++i) {
40+
uint32_t FnSize = Data.getU32(&Offset);
41+
DataExtractor FnData(Data.getData().substr(Offset, FnSize),
42+
Data.isLittleEndian(), Data.getAddressSize());
43+
llvm::Expected<FunctionInfo> FI =
44+
FunctionInfo::decode(FnData, BaseAddr + Offset);
45+
if (!FI)
46+
return FI.takeError();
47+
MFI.MergedFunctions.push_back(std::move(*FI));
48+
Offset += FnSize;
49+
}
50+
51+
return MFI;
52+
}
53+
54+
bool operator==(const MergedFunctionsInfo &LHS,
55+
const MergedFunctionsInfo &RHS) {
56+
return LHS.MergedFunctions == RHS.MergedFunctions;
57+
}

0 commit comments

Comments
 (0)