Skip to content

Commit 045d4d8

Browse files
committed
[InstrProf] Add debuginfod correlation support
This patch adds debuginfod support into llvm-profdata to find the assosicated executable by a build id in a raw profile to correlate a profile by using profile data and name sections in an executable.
1 parent fa089ef commit 045d4d8

File tree

8 files changed

+153
-23
lines changed

8 files changed

+153
-23
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// REQUIRES: linux || windows
2+
// RUN: rm -rf %t
3+
4+
// Default build with no profile correlation.
5+
// RUN: %clang_profgen -o %t.default.exe -Wl,--build-id=0x12345678 -fprofile-instr-generate -fcoverage-mapping %S/Inputs/instrprof-debug-info-correlate-main.cpp %S/Inputs/instrprof-debug-info-correlate-foo.cpp
6+
// RUN: env LLVM_PROFILE_FILE=%t.default.profraw %run %t.default.exe
7+
// RUN: llvm-profdata merge -o %t.default.profdata %t.default.profraw
8+
9+
// Build with profile binary correlation and test llvm-profdata merge profile correlation with --binary-file option.
10+
// RUN: %clang_profgen -o %t.correlate.exe -Wl,--build-id=0x12345678 -fprofile-instr-generate -fcoverage-mapping -mllvm -profile-correlate=binary %S/Inputs/instrprof-debug-info-correlate-main.cpp %S/Inputs/instrprof-debug-info-correlate-foo.cpp
11+
// Strip above binary and run
12+
// RUN: llvm-strip %t.correlate.exe -o %t.stripped.exe
13+
// RUN: env LLVM_PROFILE_FILE=%t.correlate.profraw %run %t.stripped.exe
14+
// RUN: llvm-profdata merge -o %t.correlate-binary.profdata --binary-file=%t.correlate.exe %t.correlate.profraw
15+
// RUN: diff %t.default.profdata %t.correlate-binary.profdata
16+
17+
// Test llvm-profdata merge profile correlation with --debuginfod option.
18+
// RUN: mkdir -p %t/buildid/12345678
19+
// RUN: cp %t.correlate.exe %t/buildid/12345678/debuginfo
20+
// RUN: env DEBUGINFOD_CACHE_PATH=%t/debuginfod-cache DEBUGINFOD_URLS=file://%t llvm-profdata merge -o %t.correlate-debuginfod.profdata --debuginfod %t.correlate.profraw
21+
// RUN: diff %t.default.profdata %t.correlate-debuginfod.profdata
22+
23+
// Test llvm-profdata merge profile correlation with --debug-file-directory option.
24+
// RUN: mkdir -p %t/.build-id/12
25+
// RUN: cp %t.correlate.exe %t/.build-id/12/345678.debug
26+
// RUN: llvm-profdata merge -o %t.correlate-debug-file-dir.profdata --debug-file-directory %t %t.correlate.profraw
27+
// RUN: diff %t.default.profdata %t.correlate-debug-file-dir.profdata

llvm/docs/CommandGuide/llvm-profdata.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,20 @@ OPTIONS
204204
the raw profile. When ``-profile-correlate=binary`` was used for
205205
instrumentation, use this option to correlate the raw profile.
206206

207+
.. option:: --debuginfod
208+
209+
Use debuginfod to find the associated executables that contain profile data and
210+
name sections for the raw profiles to correlate them.
211+
When -profile-correlate=binary was used for instrumentation, this option can be
212+
used for correlation.
213+
214+
.. option:: -debug-file-directory=<dir>
215+
216+
Use provided local directories to search for executables that contain profile
217+
data and name sections for the raw profiles to correlate them.
218+
When -profile-correlate=binary was used for instrumentation, this option can be
219+
used for correlation.
220+
207221
.. option:: --temporal-profile-trace-reservoir-size
208222

209223
The maximum number of temporal profile traces to be stored in the output

llvm/include/llvm/ProfileData/InstrProfCorrelator.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H
1414

1515
#include "llvm/ADT/DenseSet.h"
16+
#include "llvm/Debuginfod/BuildIDFetcher.h"
17+
#include "llvm/Object/BuildID.h"
1618
#include "llvm/ProfileData/InstrProf.h"
1719
#include "llvm/Support/Error.h"
1820
#include "llvm/Support/MemoryBuffer.h"
@@ -36,7 +38,9 @@ class InstrProfCorrelator {
3638
enum ProfCorrelatorKind { NONE, DEBUG_INFO, BINARY };
3739

3840
static llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
39-
get(StringRef Filename, ProfCorrelatorKind FileKind);
41+
get(StringRef Filename, ProfCorrelatorKind FileKind,
42+
const object::BuildIDFetcher *BIDFetcher = nullptr,
43+
const std::optional<ArrayRef<llvm::object::BuildID>> BIs = std::nullopt);
4044

4145
/// Construct a ProfileData vector used to correlate raw instrumentation data
4246
/// to their functions.

llvm/include/llvm/ProfileData/InstrProfReader.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,11 +200,13 @@ class InstrProfReader {
200200
static Expected<std::unique_ptr<InstrProfReader>>
201201
create(const Twine &Path, vfs::FileSystem &FS,
202202
const InstrProfCorrelator *Correlator = nullptr,
203+
const object::BuildIDFetcher *BIDFetcher = nullptr,
203204
std::function<void(Error)> Warn = nullptr);
204205

205206
static Expected<std::unique_ptr<InstrProfReader>>
206207
create(std::unique_ptr<MemoryBuffer> Buffer,
207208
const InstrProfCorrelator *Correlator = nullptr,
209+
const object::BuildIDFetcher *BIDFetcher = nullptr,
208210
std::function<void(Error)> Warn = nullptr);
209211

210212
/// \param Weight for raw profiles use this as the temporal profile trace
@@ -314,6 +316,11 @@ class RawInstrProfReader : public InstrProfReader {
314316
/// If available, this hold the ProfileData array used to correlate raw
315317
/// instrumentation data to their functions.
316318
const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
319+
/// Correlator that fetches debuginfo from debuginfod on the fly by build id.
320+
std::unique_ptr<InstrProfCorrelator> DebugInfodCorrelator;
321+
/// Fetcher that fetches debuginfo from debuginfod to correlate profiles with
322+
/// binaries.
323+
const object::BuildIDFetcher *BIDFetcher;
317324
/// A list of timestamps paired with a function name reference.
318325
std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
319326
bool ShouldSwapBytes;
@@ -351,11 +358,13 @@ class RawInstrProfReader : public InstrProfReader {
351358
public:
352359
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
353360
const InstrProfCorrelator *Correlator,
361+
const object::BuildIDFetcher *BIDFetcher,
354362
std::function<void(Error)> Warn)
355363
: DataBuffer(std::move(DataBuffer)),
356364
Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
357365
Correlator)),
358-
Warn(Warn) {}
366+
BIDFetcher(BIDFetcher), Warn(Warn) {}
367+
359368
RawInstrProfReader(const RawInstrProfReader &) = delete;
360369
RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
361370

@@ -439,7 +448,7 @@ class RawInstrProfReader : public InstrProfReader {
439448

440449
void advanceData() {
441450
// `CountersDelta` is a constant zero when using debug info correlation.
442-
if (!Correlator) {
451+
if (!Correlator && !DebugInfodCorrelator) {
443452
// The initial CountersDelta is the in-memory address difference between
444453
// the data and counts sections:
445454
// start(__llvm_prf_cnts) - start(__llvm_prf_data)

llvm/lib/ProfileData/InstrProfCorrelator.cpp

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,9 @@ InstrProfCorrelator::Context::get(std::unique_ptr<MemoryBuffer> Buffer,
9191
}
9292

9393
llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
94-
InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) {
94+
InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind,
95+
const object::BuildIDFetcher *BIDFetcher,
96+
const std::optional<ArrayRef<object::BuildID>> BIs) {
9597
if (FileKind == DEBUG_INFO) {
9698
auto DsymObjectsOrErr =
9799
object::MachOObjectFile::findDsymObjectMembers(Filename);
@@ -113,11 +115,36 @@ InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) {
113115
return get(std::move(*BufferOrErr), FileKind);
114116
}
115117
if (FileKind == BINARY) {
116-
auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
117-
if (auto Err = BufferOrErr.takeError())
118-
return std::move(Err);
118+
if (!Filename.empty()) {
119+
auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
120+
if (auto Err = BufferOrErr.takeError())
121+
return std::move(Err);
122+
return get(std::move(*BufferOrErr), FileKind);
123+
} else if (BIDFetcher) {
124+
if (BIs->size() > 1)
125+
return make_error<InstrProfError>(
126+
instrprof_error::unable_to_correlate_profile,
127+
"unsupported profile binary correlation when there are multiple "
128+
"build IDs in a binary");
119129

120-
return get(std::move(*BufferOrErr), FileKind);
130+
std::optional<std::string> Path = BIDFetcher->fetch(BIs->front());
131+
if (Path) {
132+
auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(*Path));
133+
if (auto Err = BufferOrErr.takeError())
134+
return std::move(Err);
135+
return get(std::move(*BufferOrErr), BINARY);
136+
} else {
137+
return make_error<InstrProfError>(
138+
instrprof_error::unable_to_correlate_profile,
139+
"Missing build ID: " +
140+
llvm::toHex(BIs->front(), /*LowerCase=*/true));
141+
}
142+
} else {
143+
return make_error<InstrProfError>(
144+
instrprof_error::unable_to_correlate_profile,
145+
"unsupported profile binary correlation when provided with a file "
146+
"name and build id fetcher");
147+
}
121148
}
122149
return make_error<InstrProfError>(
123150
instrprof_error::unable_to_correlate_profile,

llvm/lib/ProfileData/InstrProfReader.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,19 +153,19 @@ static void printBinaryIdsInternal(raw_ostream &OS,
153153
Expected<std::unique_ptr<InstrProfReader>>
154154
InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
155155
const InstrProfCorrelator *Correlator,
156+
const object::BuildIDFetcher *BIDFetcher,
156157
std::function<void(Error)> Warn) {
157158
// Set up the buffer to read.
158159
auto BufferOrError = setupMemoryBuffer(Path, FS);
159160
if (Error E = BufferOrError.takeError())
160161
return std::move(E);
161162
return InstrProfReader::create(std::move(BufferOrError.get()), Correlator,
162-
Warn);
163+
BIDFetcher, Warn);
163164
}
164165

165-
Expected<std::unique_ptr<InstrProfReader>>
166-
InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
167-
const InstrProfCorrelator *Correlator,
168-
std::function<void(Error)> Warn) {
166+
Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create(
167+
std::unique_ptr<MemoryBuffer> Buffer, const InstrProfCorrelator *Correlator,
168+
const object::BuildIDFetcher *BIDFetcher, std::function<void(Error)> Warn) {
169169
if (Buffer->getBufferSize() == 0)
170170
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
171171

@@ -174,9 +174,11 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
174174
if (IndexedInstrProfReader::hasFormat(*Buffer))
175175
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
176176
else if (RawInstrProfReader64::hasFormat(*Buffer))
177-
Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, Warn));
177+
Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator,
178+
BIDFetcher, Warn));
178179
else if (RawInstrProfReader32::hasFormat(*Buffer))
179-
Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, Warn));
180+
Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator,
181+
BIDFetcher, Warn));
180182
else if (TextInstrProfReader::hasFormat(*Buffer))
181183
Result.reset(new TextInstrProfReader(std::move(Buffer)));
182184
else
@@ -633,6 +635,20 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
633635
if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
634636
return error(instrprof_error::bad_header);
635637

638+
if (BIDFetcher) {
639+
std::vector<object::BuildID> BinaryIDs;
640+
if (Error E = readBinaryIds(BinaryIDs))
641+
return E;
642+
if (auto E = InstrProfCorrelator::get(
643+
"", InstrProfCorrelator::ProfCorrelatorKind::BINARY,
644+
BIDFetcher, BinaryIDs)
645+
.moveInto(DebugInfodCorrelator)) {
646+
return E;
647+
}
648+
if (auto Err = DebugInfodCorrelator->correlateProfileData(0))
649+
return Err;
650+
}
651+
636652
if (Correlator) {
637653
// These sizes in the raw file are zero because we constructed them in the
638654
// Correlator.
@@ -643,6 +659,14 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
643659
DataEnd = Data + Correlator->getDataSize();
644660
NamesStart = Correlator->getNamesPointer();
645661
NamesEnd = NamesStart + Correlator->getNamesSize();
662+
} else if (DebugInfodCorrelator) {
663+
InstrProfCorrelatorImpl<IntPtrT> *DebugInfodCorrelatorImpl =
664+
dyn_cast_or_null<InstrProfCorrelatorImpl<IntPtrT>>(
665+
DebugInfodCorrelator.get());
666+
Data = DebugInfodCorrelatorImpl->getDataPointer();
667+
DataEnd = Data + DebugInfodCorrelatorImpl->getDataSize();
668+
NamesStart = DebugInfodCorrelatorImpl->getNamesPointer();
669+
NamesEnd = NamesStart + DebugInfodCorrelatorImpl->getNamesSize();
646670
} else {
647671
Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
648672
Start + DataOffset);

llvm/tools/llvm-profdata/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,7 @@ add_llvm_tool(llvm-profdata
1212
intrinsics_gen
1313
GENERATE_DRIVER
1414
)
15+
16+
if(NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
17+
target_link_libraries(llvm-profdata PRIVATE LLVMDebuginfod)
18+
endif()

llvm/tools/llvm-profdata/llvm-profdata.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/ADT/SmallSet.h"
1414
#include "llvm/ADT/SmallVector.h"
1515
#include "llvm/ADT/StringRef.h"
16+
#include "llvm/Debuginfod/HTTPClient.h"
1617
#include "llvm/IR/LLVMContext.h"
1718
#include "llvm/Object/Binary.h"
1819
#include "llvm/ProfileData/InstrProfCorrelator.h"
@@ -130,6 +131,12 @@ cl::opt<std::string>
130131
cl::desc("For merge, use the provided unstripped bianry to "
131132
"correlate the raw profile."),
132133
cl::sub(MergeSubcommand));
134+
cl::list<std::string> DebugFileDirectory(
135+
"debug-file-directory",
136+
cl::desc("Directories to search for object files by build ID"));
137+
cl::opt<bool> DebugInfod("debuginfod", cl::init(false), cl::Hidden,
138+
cl::sub(MergeSubcommand),
139+
cl::desc("Enable debuginfod"));
133140
cl::opt<std::string> FuncNameFilter(
134141
"function",
135142
cl::desc("Only functions matching the filter are shown in the output. For "
@@ -652,7 +659,8 @@ static void overlapInput(const std::string &BaseFilename,
652659
/// Load an input into a writer context.
653660
static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
654661
const InstrProfCorrelator *Correlator,
655-
const StringRef ProfiledBinary, WriterContext *WC) {
662+
const StringRef ProfiledBinary, WriterContext *WC,
663+
const object::BuildIDFetcher *BIDFetcher = nullptr) {
656664
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
657665

658666
// Copy the filename, because llvm::ThreadPool copied the input "const
@@ -730,8 +738,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
730738
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
731739
ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
732740
};
733-
auto ReaderOrErr =
734-
InstrProfReader::create(Input.Filename, *FS, Correlator, Warn);
741+
auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator,
742+
BIDFetcher, Warn);
735743
if (Error E = ReaderOrErr.takeError()) {
736744
// Skip the empty profiles by returning silently.
737745
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
@@ -914,9 +922,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
914922
exitWithError("unknown format is specified");
915923

916924
// TODO: Maybe we should support correlation with mixture of different
917-
// correlation modes(w/wo debug-info/object correlation).
918-
if (!DebugInfoFilename.empty() && !BinaryFilename.empty())
919-
exitWithError("Expected only one of -debug-info, -binary-file");
925+
// correlaxtion modes(w/wo debug-info/object correlation).
926+
if (DebugInfoFilename.empty()) {
927+
if (!BinaryFilename.empty() && DebugInfod)
928+
exitWithError("Expected only one of -binary-file, -debuginfod");
929+
} else if (!BinaryFilename.empty() || DebugInfod) {
930+
exitWithError(
931+
"Expected only one of -debug-info, -binary-file, -debuginfod");
932+
}
920933
std::string CorrelateFilename;
921934
ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
922935
if (!DebugInfoFilename.empty()) {
@@ -936,6 +949,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
936949
exitWithError(std::move(Err), CorrelateFilename);
937950
}
938951

952+
std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
953+
if (DebugInfod) {
954+
llvm::HTTPClient::initialize();
955+
BIDFetcher = std::make_unique<DebuginfodFetcher>(DebugFileDirectory);
956+
} else if (!DebugFileDirectory.empty()) {
957+
BIDFetcher = std::make_unique<object::BuildIDFetcher>(DebugFileDirectory);
958+
}
959+
939960
std::mutex ErrorLock;
940961
SmallSet<instrprof_error, 4> WriterErrorCodes;
941962

@@ -954,15 +975,15 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
954975
if (NumThreads == 1) {
955976
for (const auto &Input : Inputs)
956977
loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
957-
Contexts[0].get());
978+
Contexts[0].get(), BIDFetcher.get());
958979
} else {
959980
DefaultThreadPool Pool(hardware_concurrency(NumThreads));
960981

961982
// Load the inputs in parallel (N/NumThreads serial steps).
962983
unsigned Ctx = 0;
963984
for (const auto &Input : Inputs) {
964985
Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
965-
Contexts[Ctx].get());
986+
Contexts[Ctx].get(), BIDFetcher.get());
966987
Ctx = (Ctx + 1) % NumThreads;
967988
}
968989
Pool.wait();

0 commit comments

Comments
 (0)