Skip to content

Reapply [BOLT] DataAggregator support for binaries with multiple text segments #118023

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ class DataAggregator : public DataReader {
std::string BuildIDBinaryName;

/// Memory map info for a single file as recorded in perf.data
/// When a binary has multiple text segments, the Size is computed as the
/// difference of the last address of these segments from the BaseAddress.
/// The base addresses of all text segments must be the same.
struct MMapInfo {
uint64_t BaseAddress{0}; /// Base address of the mapped binary.
uint64_t MMapAddress{0}; /// Address of the executable segment.
Expand Down Expand Up @@ -493,6 +496,11 @@ class DataAggregator : public DataReader {
/// and return a file name matching a given \p FileBuildID.
std::optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);

/// Get a constant reference to the parsed binary mmap entries.
const std::unordered_map<uint64_t, MMapInfo> &getBinaryMMapInfo() {
return BinaryMMapInfo;
}

friend class YAMLProfileWriter;
};
} // namespace bolt
Expand Down
43 changes: 29 additions & 14 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ cl::opt<bool> ReadPreAggregated(
"pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
cl::cat(AggregatorCategory));

cl::opt<std::string>
ReadPerfEvents("perf-script-events",
cl::desc("skip perf event collection by supplying a "
"perf-script output in a textual format"),
cl::ReallyHidden, cl::init(""), cl::cat(AggregatorCategory));

static cl::opt<bool>
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
Expand Down Expand Up @@ -167,8 +173,9 @@ void DataAggregator::findPerfExecutable() {
void DataAggregator::start() {
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";

// Don't launch perf for pre-aggregated files
if (opts::ReadPreAggregated)
// Don't launch perf for pre-aggregated files or when perf input is specified
// by the user.
if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
return;

findPerfExecutable();
Expand Down Expand Up @@ -464,6 +471,13 @@ void DataAggregator::filterBinaryMMapInfo() {

int DataAggregator::prepareToParse(StringRef Name, PerfProcessInfo &Process,
PerfProcessErrorCallbackTy Callback) {
if (!opts::ReadPerfEvents.empty()) {
outs() << "PERF2BOLT: using pre-processed perf events for '" << Name
<< "' (perf-script-events)\n";
ParsingBuf = opts::ReadPerfEvents;
return 0;
}

std::string Error;
outs() << "PERF2BOLT: waiting for perf " << Name
<< " collection to finish...\n";
Expand Down Expand Up @@ -2056,15 +2070,6 @@ std::error_code DataAggregator::parseMMapEvents() {
if (FileMMapInfo.first == "(deleted)")
continue;

// Consider only the first mapping of the file for any given PID
auto Range = GlobalMMapInfo.equal_range(FileMMapInfo.first);
bool PIDExists = llvm::any_of(make_range(Range), [&](const auto &MI) {
return MI.second.PID == FileMMapInfo.second.PID;
});

if (PIDExists)
continue;

GlobalMMapInfo.insert(FileMMapInfo);
}

Expand Down Expand Up @@ -2116,12 +2121,22 @@ std::error_code DataAggregator::parseMMapEvents() {
<< " using file offset 0x" << Twine::utohexstr(MMapInfo.Offset)
<< ". Ignoring profile data for this mapping\n";
continue;
} else {
MMapInfo.BaseAddress = *BaseAddress;
}
MMapInfo.BaseAddress = *BaseAddress;
}

BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo));
// Try to add MMapInfo to the map and update its size. Large binaries may
// span to multiple text segments, so the mapping is inserted only on the
// first occurrence.
if (!BinaryMMapInfo.insert(std::make_pair(MMapInfo.PID, MMapInfo)).second)
assert(MMapInfo.BaseAddress == BinaryMMapInfo[MMapInfo.PID].BaseAddress &&
"Base address on multiple segment mappings should match");

// Update mapping size.
const uint64_t EndAddress = MMapInfo.MMapAddress + MMapInfo.Size;
const uint64_t Size = EndAddress - BinaryMMapInfo[MMapInfo.PID].BaseAddress;
if (Size > BinaryMMapInfo[MMapInfo.PID].Size)
BinaryMMapInfo[MMapInfo.PID].Size = Size;
}

if (BinaryMMapInfo.empty()) {
Expand Down
3 changes: 3 additions & 0 deletions bolt/unittests/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
add_bolt_unittest(CoreTests
BinaryContext.cpp
MCPlusBuilder.cpp
MemoryMaps.cpp
DynoStats.cpp

DISABLE_LLVM_LINK_LLVM_DYLIB
Expand All @@ -17,6 +18,8 @@ target_link_libraries(CoreTests
PRIVATE
LLVMBOLTCore
LLVMBOLTRewrite
LLVMBOLTProfile
LLVMTestingSupport
)

foreach (tgt ${BOLT_TARGETS_TO_BUILD})
Expand Down
142 changes: 142 additions & 0 deletions bolt/unittests/Core/MemoryMaps.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
//===- bolt/unittest/Core/MemoryMaps.cpp ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "bolt/Core/BinaryContext.h"
#include "bolt/Profile/DataAggregator.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"

using namespace llvm;
using namespace llvm::object;
using namespace llvm::ELF;
using namespace bolt;

namespace opts {
extern cl::opt<std::string> ReadPerfEvents;
} // namespace opts

namespace {

/// Perform checks on memory map events normally captured in perf. Tests use
/// the 'opts::ReadPerfEvents' flag to emulate these events, passing a custom
/// 'perf script' output to DataAggregator.
struct MemoryMapsTester : public testing::TestWithParam<Triple::ArchType> {
void SetUp() override {
initalizeLLVM();
prepareElf();
initializeBOLT();
}

protected:
void initalizeLLVM() {
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllAsmParsers();
llvm::InitializeAllDisassemblers();
llvm::InitializeAllTargets();
llvm::InitializeAllAsmPrinters();
}

void prepareElf() {
memcpy(ElfBuf, "\177ELF", 4);
ELF64LE::Ehdr *EHdr = reinterpret_cast<typename ELF64LE::Ehdr *>(ElfBuf);
EHdr->e_ident[llvm::ELF::EI_CLASS] = llvm::ELF::ELFCLASS64;
EHdr->e_ident[llvm::ELF::EI_DATA] = llvm::ELF::ELFDATA2LSB;
EHdr->e_machine = GetParam() == Triple::aarch64 ? EM_AARCH64 : EM_X86_64;
MemoryBufferRef Source(StringRef(ElfBuf, sizeof(ElfBuf)), "ELF");
ObjFile = cantFail(ObjectFile::createObjectFile(Source));
}

void initializeBOLT() {
Relocation::Arch = ObjFile->makeTriple().getArch();
BC = cantFail(BinaryContext::createBinaryContext(
ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true,
DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
ASSERT_FALSE(!BC);
}

char ElfBuf[sizeof(typename ELF64LE::Ehdr)] = {};
std::unique_ptr<ObjectFile> ObjFile;
std::unique_ptr<BinaryContext> BC;
};
} // namespace

#ifdef X86_AVAILABLE

INSTANTIATE_TEST_SUITE_P(X86, MemoryMapsTester,
::testing::Values(Triple::x86_64));

#endif

#ifdef AARCH64_AVAILABLE

INSTANTIATE_TEST_SUITE_P(AArch64, MemoryMapsTester,
::testing::Values(Triple::aarch64));

#endif

/// Check that the correct mmap size is computed when we have multiple text
/// segment mappings.
TEST_P(MemoryMapsTester, ParseMultipleSegments) {
const int Pid = 1234;
StringRef Filename = "BINARY";
opts::ReadPerfEvents = formatv(
"name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
"[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n"
"name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
"[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n",
Pid, Filename);

BC->SegmentMapInfo[0x11da000] =
SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true};
BC->SegmentMapInfo[0x31d0000] =
SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0000, 0x3000000, 0x200000, true};

DataAggregator DA("");
BC->setFilename(Filename);
Error Err = DA.preprocessProfile(*BC);

// Ignore errors from perf2bolt when parsing memory events later on.
ASSERT_THAT_ERROR(std::move(Err), Succeeded());

auto &BinaryMMapInfo = DA.getBinaryMMapInfo();
auto El = BinaryMMapInfo.find(Pid);
// Check that memory mapping is present and has the expected size.
ASSERT_NE(El, BinaryMMapInfo.end());
ASSERT_EQ(El->second.Size, static_cast<uint64_t>(0xb1d0000));
}

/// Check that DataAggregator aborts when pre-processing an input binary
/// with multiple text segments that have different base addresses.
TEST_P(MemoryMapsTester, MultipleSegmentsMismatchedBaseAddress) {
const int Pid = 1234;
StringRef Filename = "BINARY";
opts::ReadPerfEvents = formatv(
"name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
"[0xabc0000000(0x1000000) @ 0x11c0000 103:01 1573523 0]: r-xp {1}\n"
"name 0 [000] 0.000000: PERF_RECORD_MMAP2 {0}/{0}: "
"[0xabc2000000(0x8000000) @ 0x31d0000 103:01 1573523 0]: r-xp {1}\n",
Pid, Filename);

BC->SegmentMapInfo[0x11da000] =
SegmentInfo{0x11da000, 0x10da000, 0x11ca000, 0x10da000, 0x10000, true};
// Using '0x31d0fff' FileOffset which triggers a different base address
// for this second text segment.
BC->SegmentMapInfo[0x31d0000] =
SegmentInfo{0x31d0000, 0x51ac82c, 0x31d0fff, 0x3000000, 0x200000, true};

DataAggregator DA("");
BC->setFilename(Filename);
ASSERT_DEBUG_DEATH(
{ Error Err = DA.preprocessProfile(*BC); },
"Base address on multiple segment mappings should match");
}
1 change: 1 addition & 0 deletions llvm/utils/gn/secondary/bolt/unittests/Core/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ unittest("CoreTests") {
"BinaryContext.cpp",
"DynoStats.cpp",
"MCPlusBuilder.cpp",
"MemoryMaps.cpp",
]

defines = []
Expand Down