Skip to content

Commit 254c13d

Browse files
authored
[BOLT][AArch64] Patch functions targeted by optional relocs (#138750)
On AArch64, we create optional/weak relocations that may not be processed due to the relocated value overflow. When the overflow happens, we used to enforce patching for all functions in the binary via --force-patch option. This PR relaxes the requirement, and enforces patching only for functions that are target of optional relocations. Moreover, if the compact code model is used, the relocation overflow is guaranteed not to happen and the patching will be skipped.
1 parent b836f96 commit 254c13d

File tree

9 files changed

+74
-58
lines changed

9 files changed

+74
-58
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,11 @@ class BinaryFunction {
360360
/// True if the function is used for patching code at a fixed address.
361361
bool IsPatch{false};
362362

363+
/// True if the original entry point of the function may get called, but the
364+
/// original body cannot be executed and needs to be patched with code that
365+
/// redirects execution to the new function body.
366+
bool NeedsPatch{false};
367+
363368
/// True if the function should not have an associated symbol table entry.
364369
bool IsAnonymous{false};
365370

@@ -1372,6 +1377,9 @@ class BinaryFunction {
13721377
/// Return true if this function is used for patching existing code.
13731378
bool isPatch() const { return IsPatch; }
13741379

1380+
/// Return true if the function requires a patch.
1381+
bool needsPatch() const { return NeedsPatch; }
1382+
13751383
/// Return true if the function should not have associated symbol table entry.
13761384
bool isAnonymous() const { return IsAnonymous; }
13771385

@@ -1757,6 +1765,9 @@ class BinaryFunction {
17571765
IsPatch = V;
17581766
}
17591767

1768+
/// Mark the function for patching.
1769+
void setNeedsPatch(bool V) { NeedsPatch = V; }
1770+
17601771
/// Indicate if the function should have a name in the symbol table.
17611772
void setAnonymous(bool V) {
17621773
assert(isInjected() && "Only injected functions could be anonymous");

bolt/include/bolt/Utils/CommandLineOpts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ extern llvm::cl::opt<unsigned> AlignText;
3434
extern llvm::cl::opt<unsigned> AlignFunctions;
3535
extern llvm::cl::opt<bool> AggregateOnly;
3636
extern llvm::cl::opt<unsigned> BucketsPerLine;
37+
extern llvm::cl::opt<bool> CompactCodeModel;
3738
extern llvm::cl::opt<bool> DiffOnly;
3839
extern llvm::cl::opt<bool> EnableBAT;
3940
extern llvm::cl::opt<bool> EqualizeBBCounts;

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,8 +1797,6 @@ bool BinaryFunction::scanExternalRefs() {
17971797
// Create relocation for every fixup.
17981798
for (const MCFixup &Fixup : Fixups) {
17991799
std::optional<Relocation> Rel = BC.MIB->createRelocation(Fixup, *BC.MAB);
1800-
// Can be skipped in case of overlow during relocation value encoding.
1801-
Rel->setOptional();
18021800
if (!Rel) {
18031801
Success = false;
18041802
continue;
@@ -1814,6 +1812,17 @@ bool BinaryFunction::scanExternalRefs() {
18141812
Success = false;
18151813
continue;
18161814
}
1815+
1816+
if (BC.isAArch64()) {
1817+
// Allow the relocation to be skipped in case of the overflow during the
1818+
// relocation value encoding.
1819+
Rel->setOptional();
1820+
1821+
if (!opts::CompactCodeModel)
1822+
if (BinaryFunction *TargetBF = BC.getFunctionForSymbol(Rel->Symbol))
1823+
TargetBF->setNeedsPatch(true);
1824+
}
1825+
18171826
Rel->Offset += getAddress() - getOriginSection()->getAddress() + Offset;
18181827
FunctionRelocations.push_back(*Rel);
18191828
}

bolt/lib/Core/BinarySection.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -186,16 +186,6 @@ void BinarySection::flushPendingRelocations(raw_pwrite_stream &OS,
186186
!Relocation::canEncodeValue(Reloc.Type, Value,
187187
SectionAddress + Reloc.Offset)) {
188188

189-
// A successful run of 'scanExternalRefs' means that all pending
190-
// relocations are flushed. Otherwise, PatchEntries should run.
191-
if (!opts::ForcePatch) {
192-
BC.errs()
193-
<< "BOLT-ERROR: cannot encode relocation for symbol "
194-
<< Reloc.Symbol->getName()
195-
<< " as it is out-of-range. To proceed must use -force-patch\n";
196-
exit(1);
197-
}
198-
199189
++SkippedPendingRelocations;
200190
continue;
201191
}

bolt/lib/Passes/LongJmp.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "bolt/Passes/LongJmp.h"
1414
#include "bolt/Core/ParallelUtilities.h"
15+
#include "bolt/Utils/CommandLineOpts.h"
1516
#include "llvm/Support/MathExtras.h"
1617

1718
#define DEBUG_TYPE "longjmp"
@@ -26,11 +27,6 @@ extern cl::opt<unsigned> AlignFunctions;
2627
extern cl::opt<bool> UseOldText;
2728
extern cl::opt<bool> HotFunctionsAtEnd;
2829

29-
static cl::opt<bool>
30-
CompactCodeModel("compact-code-model",
31-
cl::desc("generate code for binaries <128MB on AArch64"),
32-
cl::init(false), cl::cat(BoltCategory));
33-
3430
static cl::opt<bool> GroupStubs("group-stubs",
3531
cl::desc("share stubs across functions"),
3632
cl::init(true), cl::cat(BoltOptCategory));

bolt/lib/Passes/PatchEntries.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
3939
bool NeedsPatching = llvm::any_of(
4040
llvm::make_second_range(BC.getBinaryFunctions()),
4141
[&](BinaryFunction &BF) {
42-
return !BC.shouldEmit(BF) && !BF.hasExternalRefRelocations();
42+
return (!BC.shouldEmit(BF) && !BF.hasExternalRefRelocations()) ||
43+
BF.needsPatch();
4344
});
4445

4546
if (!NeedsPatching)
@@ -66,7 +67,7 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) {
6667

6768
// Check if we can skip patching the function.
6869
if (!opts::ForcePatch && !Function.hasEHRanges() &&
69-
Function.getSize() < PatchThreshold)
70+
!Function.needsPatch() && Function.getSize() < PatchThreshold)
7071
continue;
7172

7273
// List of patches for function entries. We either successfully patch

bolt/lib/Utils/CommandLineOpts.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ cl::opt<unsigned>
6161
cl::desc("number of entries per line (default 256)"),
6262
cl::init(256), cl::Optional, cl::cat(HeatmapCategory));
6363

64+
cl::opt<bool>
65+
CompactCodeModel("compact-code-model",
66+
cl::desc("generate code for binaries <128MB on AArch64"),
67+
cl::init(false), cl::cat(BoltCategory));
68+
6469
cl::opt<bool>
6570
DiffOnly("diff-only",
6671
cl::desc("stop processing once we have enough to compare two binaries"),

bolt/test/AArch64/lite-mode.s

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,34 @@
22
## non-optimized code.
33

44
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
5+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
6+
# RUN: --defsym COMPACT=1 %s -o %t.compact.o
57
# RUN: link_fdata %s %t.o %t.fdata
6-
# RUN: llvm-strip --strip-unneeded %t.o
8+
# RUN: llvm-strip --strip-unneeded %t*.o
79
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static
10+
# RUN: %clang %cflags %t.compact.o -o %t.compact.exe -Wl,-q -static
811
# RUN: llvm-bolt %t.exe -o %t.bolt --data %t.fdata --lite
12+
# RUN: llvm-bolt %t.compact.exe -o %t.compact.bolt --data %t.fdata --lite \
13+
# RUN: --compact-code-model
914
# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.exe \
1015
# RUN: | FileCheck %s --check-prefix=CHECK-INPUT
1116
# RUN: llvm-objdump -d --disassemble-symbols=cold_function %t.bolt \
1217
# RUN: | FileCheck %s
18+
# RUN: llvm-objdump -d --disassemble-symbols=_start.org.0 %t.bolt \
19+
# RUN: | FileCheck %s --check-prefix=CHECK-PATCH
20+
# RUN: llvm-objdump -d %t.compact.bolt \
21+
# RUN: | FileCheck %s --check-prefix=CHECK-COMPACT
1322

23+
## In compact mode, make sure we do not create an unnecessary patch thunk.
24+
# CHECK-COMPACT-NOT: <_start.org.0>
1425

1526
## Verify that the number of FDEs matches the number of functions in the output
1627
## binary. There are three original functions and two optimized.
28+
## NOTE: at the moment we are emitting extra FDEs for patched functions, thus
29+
## there is one more FDE for _start.
1730
# RUN: llvm-readelf -u %t.bolt | grep -wc FDE \
1831
# RUN: | FileCheck --check-prefix=CHECK-FDE %s
19-
# CHECK-FDE: 5
32+
# CHECK-FDE: 6
2033

2134
## In lite mode, optimized code will be separated from the original .text by
2235
## over 128MB, making it impossible for call/bl instructions in cold functions
@@ -28,15 +41,22 @@
2841
_start:
2942
# FDATA: 0 [unknown] 0 1 _start 0 0 100
3043
.cfi_startproc
44+
45+
## Check that the code at the original location is converted into a
46+
## veneer/thunk.
47+
# CHECK-PATCH-LABEL: <_start.org.0>
48+
# CHECK-PATCH-NEXT: adrp x16
49+
# CHECK-PATCH-NEXT: add x16, x16,
50+
# CHECK-PATCH-NEXT: br x16
3151
cmp x0, 1
3252
b.eq .L0
3353
bl cold_function
3454
.L0:
3555
ret x30
3656
.cfi_endproc
37-
.size _start, .-_start
57+
.size _start, .-_start
3858

39-
## Cold non-optimized function with a reference to a hot function (_start).
59+
## Cold non-optimized function with references to hot functions.
4060
# CHECK: Disassembly of section .bolt.org.text:
4161
# CHECK-LABEL: <cold_function>
4262
.globl cold_function
@@ -97,12 +117,26 @@ cold_function:
97117
# CHECK-NEXT: nop
98118
# CHECK-NEXT: ldr x5
99119

120+
## Since _start is relocated further than 128MB from the call site, we check
121+
## that the call is converted into a call to its original version. That original
122+
## version should contain a veneer/thunk code that we check separately.
123+
bl _start
124+
# CHECK-INPUT-NEXT: bl {{.*}} <_start>
125+
# CHECK-NEXT: bl {{.*}} <_start.org.0>
126+
127+
## Same as above, but the instruction is a tail call.
128+
b _start
129+
# CHECK-INPUT-NEXT: b {{.*}} <_start>
130+
# CHECK-NEXT: b {{.*}} <_start.org.0>
131+
100132
.cfi_endproc
101-
.size cold_function, .-cold_function
133+
.size cold_function, .-cold_function
102134

103-
## Reserve 1MB of space to make functions that follow unreachable by ADRs in
135+
.ifndef COMPACT
136+
## Reserve 128MB of space to make functions that follow unreachable by ADRs in
104137
## code that precedes this gap.
105-
.space 0x100000
138+
.space 0x8000000
139+
.endif
106140

107141
.globl far_func
108142
.type far_func, %function
@@ -111,5 +145,4 @@ far_func:
111145
.cfi_startproc
112146
ret x30
113147
.cfi_endproc
114-
.size far_func, .-far_func
115-
148+
.size far_func, .-far_func

bolt/unittests/Core/BinaryContext.cpp

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -162,36 +162,6 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) {
162162
<< "Wrong forward branch value\n";
163163
}
164164

165-
TEST_P(BinaryContextTester,
166-
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOff) {
167-
if (GetParam() != Triple::aarch64)
168-
GTEST_SKIP();
169-
170-
// Tests that flushPendingRelocations exits if any pending relocation is out
171-
// of range and PatchEntries hasn't run. Pending relocations are added by
172-
// scanExternalRefs, so this ensures that either all scanExternalRefs
173-
// relocations were flushed or PatchEntries ran.
174-
175-
BinarySection &BS = BC->registerOrUpdateSection(
176-
".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
177-
// Create symbol 'Func0x4'
178-
MCSymbol *RelSymbol = BC->getOrCreateGlobalSymbol(4, "Func");
179-
ASSERT_TRUE(RelSymbol);
180-
Relocation Reloc{8, RelSymbol, ELF::R_AARCH64_CALL26, 0, 0};
181-
Reloc.setOptional();
182-
BS.addPendingRelocation(Reloc);
183-
184-
SmallVector<char> Vect;
185-
raw_svector_ostream OS(Vect);
186-
187-
// Resolve relocation symbol to a high value so encoding will be out of range.
188-
EXPECT_EXIT(BS.flushPendingRelocations(
189-
OS, [&](const MCSymbol *S) { return 0x800000F; }),
190-
::testing::ExitedWithCode(1),
191-
"BOLT-ERROR: cannot encode relocation for symbol Func0x4 as it is"
192-
" out-of-range. To proceed must use -force-patch");
193-
}
194-
195165
TEST_P(BinaryContextTester,
196166
FlushOptionalOutOfRangePendingRelocCALL26_ForcePatchOn) {
197167
if (GetParam() != Triple::aarch64)

0 commit comments

Comments
 (0)