Skip to content

[BOLT] Support instrumentation hook via DT_FINI_ARRAY #67348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,15 @@ class BinaryContext {
/// the execution of the binary is completed.
std::optional<uint64_t> FiniFunctionAddress;

/// DT_FINI.
std::optional<uint64_t> FiniAddress;

/// DT_FINI_ARRAY. Only used when DT_FINI is not set.
std::optional<uint64_t> FiniArrayAddress;

/// DT_FINI_ARRAYSZ. Only used when DT_FINI is not set.
std::optional<uint64_t> FiniArraySize;

/// Page alignment used for code layout.
uint64_t PageAlign{HugePageSize};

Expand Down
20 changes: 18 additions & 2 deletions bolt/include/bolt/Core/BinarySection.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,12 @@ class BinarySection {
/// Add a dynamic relocation at the given /p Offset.
void addDynamicRelocation(uint64_t Offset, MCSymbol *Symbol, uint64_t Type,
uint64_t Addend, uint64_t Value = 0) {
assert(Offset < getSize() && "offset not within section bounds");
DynamicRelocations.emplace(Relocation{Offset, Symbol, Type, Addend, Value});
addDynamicRelocation(Relocation{Offset, Symbol, Type, Addend, Value});
}

void addDynamicRelocation(const Relocation &Reloc) {
assert(Reloc.Offset < getSize() && "offset not within section bounds");
DynamicRelocations.emplace(Reloc);
}

/// Add relocation against the original contents of this section.
Expand Down Expand Up @@ -410,6 +414,18 @@ class BinarySection {
return Itr != DynamicRelocations.end() ? &*Itr : nullptr;
}

std::optional<Relocation> takeDynamicRelocationAt(uint64_t Offset) {
Relocation Key{Offset, 0, 0, 0, 0};
auto Itr = DynamicRelocations.find(Key);

if (Itr == DynamicRelocations.end())
return std::nullopt;

Relocation Reloc = *Itr;
DynamicRelocations.erase(Itr);
return Reloc;
}

uint64_t hash(const BinaryData &BD) const {
std::map<const BinaryData *, uint64_t> Cache;
return hash(BD, Cache);
Expand Down
9 changes: 9 additions & 0 deletions bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@ class RewriteInstance {
/// from meta data in the file.
void discoverFileObjects();

/// Check whether we should use DT_FINI or DT_FINI_ARRAY for instrumentation.
/// DT_FINI is preferred; DT_FINI_ARRAY is only used when no DT_FINI entry was
/// found.
Error discoverRtFiniAddress();

/// If DT_FINI_ARRAY is used for instrumentation, update the relocation of its
/// first entry to point to the instrumentation library's fini address.
void updateRtFiniReloc();

/// Create and initialize metadata rewriters for this instance.
void initializeMetadataManager();

Expand Down
2 changes: 2 additions & 0 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,9 @@ static uint64_t encodeValueAArch64(uint64_t Type, uint64_t Value, uint64_t PC) {
switch (Type) {
default:
llvm_unreachable("unsupported relocation");
case ELF::R_AARCH64_ABS16:
case ELF::R_AARCH64_ABS32:
case ELF::R_AARCH64_ABS64:
break;
case ELF::R_AARCH64_PREL16:
case ELF::R_AARCH64_PREL32:
Expand Down
86 changes: 85 additions & 1 deletion bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,10 @@ Error RewriteInstance::run() {
adjustCommandLineOptions();
discoverFileObjects();

if (opts::Instrument && !BC->IsStaticExecutable)
if (Error E = discoverRtFiniAddress())
return E;

preprocessProfileData();

// Skip disassembling if we have a translation table and we are running an
Expand Down Expand Up @@ -740,6 +744,9 @@ Error RewriteInstance::run() {

updateMetadata();

if (opts::Instrument && !BC->IsStaticExecutable)
updateRtFiniReloc();

if (opts::LinuxKernelMode) {
errs() << "BOLT-WARNING: not writing the output file for Linux Kernel\n";
return Error::success();
Expand Down Expand Up @@ -1280,6 +1287,77 @@ void RewriteInstance::discoverFileObjects() {
registerFragments();
}

Error RewriteInstance::discoverRtFiniAddress() {
// Use DT_FINI if it's available.
if (BC->FiniAddress) {
BC->FiniFunctionAddress = BC->FiniAddress;
return Error::success();
}

if (!BC->FiniArrayAddress || !BC->FiniArraySize) {
return createStringError(
std::errc::not_supported,
"Instrumentation needs either DT_FINI or DT_FINI_ARRAY");
}

if (*BC->FiniArraySize < BC->AsmInfo->getCodePointerSize()) {
return createStringError(std::errc::not_supported,
"Need at least 1 DT_FINI_ARRAY slot");
}

ErrorOr<BinarySection &> FiniArraySection =
BC->getSectionForAddress(*BC->FiniArrayAddress);
if (auto EC = FiniArraySection.getError())
return errorCodeToError(EC);

if (const Relocation *Reloc = FiniArraySection->getDynamicRelocationAt(0)) {
BC->FiniFunctionAddress = Reloc->Addend;
return Error::success();
}

if (const Relocation *Reloc = FiniArraySection->getRelocationAt(0)) {
BC->FiniFunctionAddress = Reloc->Value;
return Error::success();
}

return createStringError(std::errc::not_supported,
"No relocation for first DT_FINI_ARRAY slot");
}

void RewriteInstance::updateRtFiniReloc() {
// Updating DT_FINI is handled by patchELFDynamic.
if (BC->FiniAddress)
return;

const RuntimeLibrary *RT = BC->getRuntimeLibrary();
if (!RT || !RT->getRuntimeFiniAddress())
return;

assert(BC->FiniArrayAddress && BC->FiniArraySize &&
"inconsistent .fini_array state");

ErrorOr<BinarySection &> FiniArraySection =
BC->getSectionForAddress(*BC->FiniArrayAddress);
assert(FiniArraySection && ".fini_array removed");

if (std::optional<Relocation> Reloc =
FiniArraySection->takeDynamicRelocationAt(0)) {
assert(Reloc->Addend == BC->FiniFunctionAddress &&
"inconsistent .fini_array dynamic relocation");
Reloc->Addend = RT->getRuntimeFiniAddress();
FiniArraySection->addDynamicRelocation(*Reloc);
}

// Update the static relocation by adding a pending relocation which will get
// patched when flushPendingRelocations is called in rewriteFile. Note that
// flushPendingRelocations will calculate the value to patch as
// "Symbol + Addend". Since we don't have a symbol, just set the addend to the
// desired value.
FiniArraySection->addPendingRelocation(Relocation{
/*Offset*/ 0, /*Symbol*/ nullptr, /*Type*/ Relocation::getAbs64(),
/*Addend*/ RT->getRuntimeFiniAddress(), /*Value*/ 0});
}

void RewriteInstance::registerFragments() {
if (!BC->HasSplitFunctions)
return;
Expand Down Expand Up @@ -5135,7 +5213,13 @@ Error RewriteInstance::readELFDynamic(ELFObjectFile<ELFT> *File) {
}
break;
case ELF::DT_FINI:
BC->FiniFunctionAddress = Dyn.getPtr();
BC->FiniAddress = Dyn.getPtr();
break;
case ELF::DT_FINI_ARRAY:
BC->FiniArrayAddress = Dyn.getPtr();
break;
case ELF::DT_FINI_ARRAYSZ:
BC->FiniArraySize = Dyn.getPtr();
break;
case ELF::DT_RELA:
DynamicRelocationsAddress = Dyn.getPtr();
Expand Down
7 changes: 0 additions & 7 deletions bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,6 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
exit(1);
}

if (!BC.FiniFunctionAddress && !BC.IsStaticExecutable) {
errs() << "BOLT-ERROR: input binary lacks DT_FINI entry in the dynamic "
"section but instrumentation currently relies on patching "
"DT_FINI to write the profile\n";
exit(1);
}

if ((opts::InstrumentationWaitForks || opts::InstrumentationSleepTime) &&
opts::InstrumentationFileAppendPID) {
errs()
Expand Down
103 changes: 103 additions & 0 deletions bolt/test/AArch64/hook-fini.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
## Test the different ways of hooking the fini function for instrumentation (via
## DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE
## binaries because of the different ways of handling relocations (static or
## dynamic).
## All tests perform the following steps:
## - Compile and link for the case to be tested
## - Some sanity-checks on the dynamic section and relocations in the binary to
## verify it has the shape we want for testing:
## - DT_FINI or DT_FINI_ARRAY in dynamic section
## - No relative relocations for non-PIE
## - Instrument
## - Verify generated binary
# REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}}

# RUN: %clang %cflags -pie %s -Wl,-q -o %t.exe
# RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s
# RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
# RUN: llvm-bolt %t.exe -o %t --instrument
# RUN: llvm-readelf -drs %t | FileCheck --check-prefix=CHECK-FINI %s

# RUN: %clang %cflags -pie %s -Wl,-q,-fini=0 -o %t-no-fini.exe
# RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
# RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s
# RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument
# RUN: llvm-readelf -drs %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI %s
# RUN: llvm-readelf -ds -x .fini_array %t-no-fini | FileCheck --check-prefix=CHECK-NO-FINI-RELOC %s

## Create a dummy shared library to link against to force creation of the dynamic section.
# RUN: %clang %cflags %p/../Inputs/stub.c -fPIC -shared -o %t-stubs.so
# RUN: %clang %cflags %s -no-pie -Wl,-q,-fini=0 %t-stub.so -o %t-no-pie-no-fini.exe
# RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
# RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument
# RUN: llvm-readelf -ds -x .fini_array %t-no-pie-no-fini | FileCheck --check-prefix=CHECK-NO-PIE-NO-FINI %s

## With fini: dynamic section should contain DT_FINI
# DYN-FINI: (FINI)

## Without fini: dynamic section should only contain DT_FINI_ARRAY
# DYN-NO-FINI-NOT: (FINI)
# DYN-NO-FINI: (FINI_ARRAY)
# DYN-NO-FINI: (FINI_ARRAYSZ)

## With PIE: binary should have relative relocations
# RELOC-PIE: R_AARCH64_RELATIVE

## Without PIE: binary should not have relative relocations
# RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE

## Check that DT_FINI is set to __bolt_runtime_fini
# CHECK-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-FINI-DAG: (FINI) 0x[[FINI:[[:xdigit:]]+]]
# CHECK-FINI-DAG: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
## Check that the dynamic relocation at .fini_array was not patched
# CHECK-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
# CHECK-FINI-NOT: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI]]
# CHECK-FINI: Symbol table '.symtab' contains {{.*}} entries:
# CHECK-FINI: {{0+}}[[FINI]] {{.*}} __bolt_runtime_fini

## Check that DT_FINI_ARRAY has a dynamic relocation for __bolt_runtime_fini
# CHECK-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-NO-FINI-NOT: (FINI)
# CHECK-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
# CHECK-NO-FINI: Relocation section '.rela.dyn' at offset {{.*}} contains {{.*}} entries
# CHECK-NO-FINI: {{0+}}[[FINI_ARRAY]] {{.*}} R_AARCH64_RELATIVE [[FINI_ADDR:[[:xdigit:]]+]]
# CHECK-NO-FINI: Symbol table '.symtab' contains {{.*}} entries:
# CHECK-NO-FINI: {{0+}}[[FINI_ADDR]] {{.*}} __bolt_runtime_fini

## Check that the static relocation in .fini_array is patched even for PIE
# CHECK-NO-FINI-RELOC: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-NO-FINI-RELOC: (FINI_ARRAY) 0x[[FINI_ARRAY:[[:xdigit:]]+]]
# CHECK-NO-FINI-RELOC: Symbol table '.symtab' contains {{.*}} entries:
## Read bytes separately so we can reverse them later
# CHECK-NO-FINI-RELOC: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini
# CHECK-NO-FINI-RELOC: Hex dump of section '.fini_array':
# CHECK-NO-FINI-RELOC: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000

## Check that DT_FINI_ARRAY has static relocation applied for __bolt_runtime_fini
# CHECK-NO-PIE-NO-FINI: Dynamic section at offset {{.*}} contains {{.*}} entries:
# CHECK-NO-PIE-NO-FINI-NOT: (FINI)
# CHECK-NO-PIE-NO-FINI: (FINI_ARRAY) 0x[[FINI_ARRAY:[a-f0-9]+]]
# CHECK-NO-PIE-NO-FINI: Symbol table '.symtab' contains {{.*}} entries:
## Read address bytes separately so we can reverse them later
# CHECK-NO-PIE-NO-FINI: {{0+}}[[FINI_ADDR_B0:[[:xdigit:]]{2}]][[FINI_ADDR_B1:[[:xdigit:]]{2}]][[FINI_ADDR_B2:[[:xdigit:]]{2}]][[FINI_ADDR_B3:[[:xdigit:]]{2}]] {{.*}} __bolt_runtime_fini
# CHECK-NO-PIE-NO-FINI: Hex dump of section '.fini_array':
# CHECK-NO-PIE-NO-FINI: 0x{{0+}}[[FINI_ARRAY]] [[FINI_ADDR_B3]][[FINI_ADDR_B2]][[FINI_ADDR_B1]][[FINI_ADDR_B0]] 00000000

.globl _start
.type _start, %function
_start:
# Dummy relocation to force relocation mode.
.reloc 0, R_AARCH64_NONE
ret
.size _start, .-_start

.globl _fini
.type _fini, %function
_fini:
ret
.size _fini, .-_fini

.section .fini_array,"aw"
.align 3
.dword _fini
61 changes: 61 additions & 0 deletions bolt/test/runtime/AArch64/hook-fini.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Test the different ways of hooking the fini function for instrumentation (via
# DT_FINI and via DT_FINI_ARRAY). We test the latter for both PIE and non-PIE
# binaries because of the different ways of handling relocations (static or
# dynamic).
# All tests perform the following steps:
# - Compile and link for the case to be tested
# - Some sanity-checks on the dynamic section and relocations in the binary to
# verify it has the shape we want for testing:
# - DT_FINI or DT_FINI_ARRAY in dynamic section
# - No relative relocations for non-PIE
# - Instrument
# - Run instrumented binary
# - Verify generated profile
REQUIRES: system-linux,bolt-runtime

RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe
RUN: llvm-readelf -d %t.exe | FileCheck --check-prefix=DYN-FINI %s
RUN: llvm-readelf -r %t.exe | FileCheck --check-prefix=RELOC-PIE %s
RUN: llvm-bolt %t.exe -o %t --instrument \
RUN: --instrumentation-file=%t \
RUN: --instrumentation-file-append-pid
RUN: rm -f %t.*.fdata
RUN: %t
RUN: cat %t.*.fdata | FileCheck %s

RUN: %clang %cflags -pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-fini.exe
RUN: llvm-readelf -d %t-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
RUN: llvm-readelf -r %t-no-fini.exe | FileCheck --check-prefix=RELOC-PIE %s
RUN: llvm-bolt %t-no-fini.exe -o %t-no-fini --instrument \
RUN: --instrumentation-file=%t-no-fini \
RUN: --instrumentation-file-append-pid
RUN: rm -f %t-no-fini.*.fdata
RUN: %t-no-fini
RUN: cat %t-no-fini.*.fdata | FileCheck %s

RUN: %clang %cflags -no-pie %p/Inputs/basic-instrumentation.s -Wl,-q,-fini=0 -o %t-no-pie-no-fini.exe
RUN: llvm-readelf -d %t-no-pie-no-fini.exe | FileCheck --check-prefix=DYN-NO-FINI %s
RUN: llvm-readelf -r %t-no-pie-no-fini.exe | FileCheck --check-prefix=RELOC-NO-PIE %s
RUN: llvm-bolt %t-no-pie-no-fini.exe -o %t-no-pie-no-fini --instrument \
RUN: --instrumentation-file=%t-no-pie-no-fini \
RUN: --instrumentation-file-append-pid
RUN: rm -f %t-no-pie-no-fini.*.fdata
RUN: %t-no-pie-no-fini
RUN: cat %t-no-pie-no-fini.*.fdata | FileCheck %s

# With fini: dynamic section should contain DT_FINI
DYN-FINI: (FINI)

# Without fini: dynamic section should only contain DT_FINI_ARRAY
DYN-NO-FINI-NOT: (FINI)
DYN-NO-FINI: (FINI_ARRAY)
DYN-NO-FINI: (FINI_ARRAYSZ)

# With PIE: binary should have relative relocations
RELOC-PIE: R_AARCH64_RELATIVE

# Without PIE: binary should not have relative relocations
RELOC-NO-PIE-NOT: R_AARCH64_RELATIVE

# The instrumented profile should at least say main was called once
CHECK: main 0 0 1{{$}}