Skip to content

Commit 105ecd8

Browse files
authored
[BOLT] Avoid EH trampolines for PIEs/DSOs (#117106)
We used to emit EH trampolines for PIE/DSO whenever a function fragment contained a landing pad outside of it. However, it is common to have all landing pads in a cold fragment even when their throwers are in a hot one. To reduce the number of trampolines, analyze landing pads for any given function fragment, and if they all belong to the same (possibly different) fragment, designate that fragment as a landing pad fragment for the "thrower" fragment. Later, emit landing pad fragment symbol as an LPStart for the thrower LSDA.
1 parent dc580c9 commit 105ecd8

File tree

6 files changed

+200
-34
lines changed

6 files changed

+200
-34
lines changed

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,11 @@ class BinaryFunction {
527527
/// fragment of the function.
528528
SmallVector<MCSymbol *, 0> LSDASymbols;
529529

530+
/// Each function fragment may have another fragment containing all landing
531+
/// pads for it. If that's the case, the LP fragment will be stored in the
532+
/// vector below with indexing starting with the main fragment.
533+
SmallVector<std::optional<FragmentNum>, 0> LPFragments;
534+
530535
/// Map to discover which CFIs are attached to a given instruction offset.
531536
/// Maps an instruction offset into a FrameInstructions offset.
532537
/// This is only relevant to the buildCFG phase and is discarded afterwards.
@@ -1885,6 +1890,42 @@ class BinaryFunction {
18851890
return LSDASymbols[F.get()];
18861891
}
18871892

1893+
/// If all landing pads for the function fragment \p F are located in fragment
1894+
/// \p LPF, designate \p LPF as a landing-pad fragment for \p F. Passing
1895+
/// std::nullopt in LPF, means that landing pads for \p F are located in more
1896+
/// than one fragment.
1897+
void setLPFragment(const FragmentNum F, std::optional<FragmentNum> LPF) {
1898+
if (F.get() >= LPFragments.size())
1899+
LPFragments.resize(F.get() + 1);
1900+
1901+
LPFragments[F.get()] = LPF;
1902+
}
1903+
1904+
/// If function fragment \p F has a designated landing pad fragment, i.e. a
1905+
/// fragment that contains all landing pads for throwers in \p F, then return
1906+
/// that landing pad fragment number. If \p F does not need landing pads,
1907+
/// return \p F. Return nullptr if landing pads for \p F are scattered among
1908+
/// several function fragments.
1909+
std::optional<FragmentNum> getLPFragment(const FragmentNum F) {
1910+
if (!isSplit()) {
1911+
assert(F == FragmentNum::main() && "Invalid fragment number");
1912+
return FragmentNum::main();
1913+
}
1914+
1915+
if (F.get() >= LPFragments.size())
1916+
return std::nullopt;
1917+
1918+
return LPFragments[F.get()];
1919+
}
1920+
1921+
/// Return a symbol corresponding to a landing pad fragment for fragment \p F.
1922+
/// See getLPFragment().
1923+
MCSymbol *getLPStartSymbol(const FragmentNum F) {
1924+
if (std::optional<FragmentNum> LPFragment = getLPFragment(F))
1925+
return getSymbol(*LPFragment);
1926+
return nullptr;
1927+
}
1928+
18881929
void setOutputDataAddress(uint64_t Address) { OutputDataOffset = Address; }
18891930

18901931
uint64_t getOutputDataAddress() const { return OutputDataOffset; }

bolt/lib/Core/BinaryEmitter.cpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ class BinaryEmitter {
140140

141141
void emitCFIInstruction(const MCCFIInstruction &Inst) const;
142142

143-
/// Emit exception handling ranges for the function.
143+
/// Emit exception handling ranges for the function fragment.
144144
void emitLSDA(BinaryFunction &BF, const FunctionFragment &FF);
145145

146146
/// Emit line number information corresponding to \p NewLoc. \p PrevLoc
@@ -915,15 +915,15 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
915915
// Emit the LSDA header.
916916

917917
// If LPStart is omitted, then the start of the FDE is used as a base for
918-
// landing pad displacements. Then, if a cold fragment starts with a landing
919-
// pad, this means that the first landing pad offset will be 0. However, C++
920-
// runtime treats 0 as if there is no landing pad present, thus we *must* emit
921-
// non-zero offsets for all valid LPs.
918+
// landing pad displacements. Then, if a cold fragment starts with
919+
// a landing pad, this means that the first landing pad offset will be 0.
920+
// However, C++ runtime will treat 0 as if there is no landing pad, thus we
921+
// cannot emit LP offset as 0.
922922
//
923923
// As a solution, for fixed-address binaries we set LPStart to 0, and for
924-
// position-independent binaries we set LP start to FDE start minus one byte
925-
// for FDEs that start with a landing pad.
926-
const bool NeedsLPAdjustment = !FF.empty() && FF.front()->isLandingPad();
924+
// position-independent binaries we offset LP start by one byte.
925+
bool NeedsLPAdjustment = false;
926+
const MCSymbol *LPStartSymbol = nullptr;
927927
std::function<void(const MCSymbol *)> emitLandingPad;
928928
if (BC.HasFixedLoadAddress) {
929929
Streamer.emitIntValue(dwarf::DW_EH_PE_udata4, 1); // LPStart format
@@ -935,17 +935,26 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
935935
Streamer.emitIntValue(0, 4);
936936
};
937937
} else {
938-
if (NeedsLPAdjustment) {
939-
// Use relative LPStart format and emit LPStart as [SymbolStart - 1].
938+
std::optional<FragmentNum> LPFN = BF.getLPFragment(FF.getFragmentNum());
939+
LPStartSymbol = BF.getLPStartSymbol(FF.getFragmentNum());
940+
assert(LPFN && LPStartSymbol && "Expected LPStart symbol to be set");
941+
942+
const FunctionFragment &LPFragment = BF.getLayout().getFragment(*LPFN);
943+
NeedsLPAdjustment =
944+
(!LPFragment.empty() && LPFragment.front()->isLandingPad());
945+
946+
// Emit LPStart encoding and optionally LPStart.
947+
if (NeedsLPAdjustment || LPStartSymbol != StartSymbol) {
940948
Streamer.emitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
941949
MCSymbol *DotSymbol = BC.Ctx->createTempSymbol("LPBase");
942950
Streamer.emitLabel(DotSymbol);
943951

944952
const MCExpr *LPStartExpr = MCBinaryExpr::createSub(
945-
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx),
953+
MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx),
946954
MCSymbolRefExpr::create(DotSymbol, *BC.Ctx), *BC.Ctx);
947-
LPStartExpr = MCBinaryExpr::createSub(
948-
LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
955+
if (NeedsLPAdjustment)
956+
LPStartExpr = MCBinaryExpr::createSub(
957+
LPStartExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);
949958
Streamer.emitValue(LPStartExpr, 4);
950959
} else {
951960
// DW_EH_PE_omit means FDE start (StartSymbol) will be used as LPStart.
@@ -955,7 +964,7 @@ void BinaryEmitter::emitLSDA(BinaryFunction &BF, const FunctionFragment &FF) {
955964
if (LPSymbol) {
956965
const MCExpr *LPOffsetExpr = MCBinaryExpr::createSub(
957966
MCSymbolRefExpr::create(LPSymbol, *BC.Ctx),
958-
MCSymbolRefExpr::create(StartSymbol, *BC.Ctx), *BC.Ctx);
967+
MCSymbolRefExpr::create(LPStartSymbol, *BC.Ctx), *BC.Ctx);
959968
if (NeedsLPAdjustment)
960969
LPOffsetExpr = MCBinaryExpr::createAdd(
961970
LPOffsetExpr, MCConstantExpr::create(1, *BC.Ctx), *BC.Ctx);

bolt/lib/Passes/SplitFunctions.cpp

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -901,8 +901,43 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
901901
// have to be placed in the same fragment. When we split them, create
902902
// trampoline landing pads that will redirect the execution to real LPs.
903903
TrampolineSetType Trampolines;
904-
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
905-
Trampolines = createEHTrampolines(BF);
904+
if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) {
905+
// If all landing pads for this fragment are grouped in one (potentially
906+
// different) fragment, we can set LPStart to the start of that fragment
907+
// and avoid trampoline code.
908+
bool NeedsTrampolines = false;
909+
for (FunctionFragment &FF : BF.getLayout().fragments()) {
910+
// Vector of fragments that contain landing pads for this fragment.
911+
SmallVector<FragmentNum, 4> LandingPadFragments;
912+
for (const BinaryBasicBlock *BB : FF)
913+
for (const BinaryBasicBlock *LPB : BB->landing_pads())
914+
LandingPadFragments.push_back(LPB->getFragmentNum());
915+
916+
// Eliminate duplicate entries from the vector.
917+
llvm::sort(LandingPadFragments);
918+
auto Last = llvm::unique(LandingPadFragments);
919+
LandingPadFragments.erase(Last, LandingPadFragments.end());
920+
921+
if (LandingPadFragments.size() == 0) {
922+
// If the fragment has no landing pads, we can safely set itself as its
923+
// landing pad fragment.
924+
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
925+
} else if (LandingPadFragments.size() == 1) {
926+
BF.setLPFragment(FF.getFragmentNum(), LandingPadFragments.front());
927+
} else {
928+
NeedsTrampolines = true;
929+
break;
930+
}
931+
}
932+
933+
// Trampolines guarantee that all landing pads for any given fragment will
934+
// be contained in the same fragment.
935+
if (NeedsTrampolines) {
936+
for (FunctionFragment &FF : BF.getLayout().fragments())
937+
BF.setLPFragment(FF.getFragmentNum(), FF.getFragmentNum());
938+
Trampolines = createEHTrampolines(BF);
939+
}
940+
}
906941

907942
// Check the new size to see if it's worth splitting the function.
908943
if (BC.isX86() && LayoutUpdated) {
@@ -933,6 +968,10 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
933968
}
934969
}
935970

971+
// Restore LP fragment for the main fragment if the splitting was undone.
972+
if (BF.hasEHRanges() && !BF.isSplit())
973+
BF.setLPFragment(FragmentNum::main(), FragmentNum::main());
974+
936975
// Fix branches if the splitting decision of the pass after function
937976
// reordering is different from that of the pass before function reordering.
938977
if (LayoutUpdated && BC.HasFinalizedFunctionOrder)

bolt/test/X86/pie-eh-split-undo.s

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# REQUIRES: system-linux
2+
3+
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
4+
# RUN: link_fdata %s %t.o %t.fdata
5+
# RUN: llvm-strip --strip-unneeded %t.o
6+
# RUN: ld.lld --pie %t.o -o %t.exe -q
7+
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --split-functions --split-eh \
8+
# RUN: --split-all-cold --print-after-lowering --print-only=_start 2>&1 \
9+
# RUN: | FileCheck %s
10+
11+
## _start has two landing pads: one hot and one cold. Hence, BOLT will introduce
12+
## a landing pad trampoline. However, the trampoline code will make the main
13+
## split fragment larger than the whole function before split. Then BOLT will
14+
## undo the splitting and remove the trampoline.
15+
16+
# CHECK: Binary Function "_start"
17+
# CHECK: IsSplit :
18+
# CHECK-SAME: 0
19+
20+
## Check that a landing pad trampoline was created, but contains no instructions
21+
## and falls though to the real landing pad.
22+
23+
# CHECK: {{^[^[:space:]]+}} (0 instructions
24+
# CHECK-NEXT: Landing Pad{{$}}
25+
# CHECK: Exec Count
26+
# CHECK-SAME: : 0
27+
# CHECK: Successors:
28+
# CHECK-SAME: [[LP:[^[:space:]]+]]
29+
# CHECK-EMPTY:
30+
# CHECK-NEXT: [[LP]]
31+
32+
.text
33+
.global foo
34+
.type foo, %function
35+
foo:
36+
.cfi_startproc
37+
ret
38+
.cfi_endproc
39+
.size foo, .-foo
40+
41+
.globl _start
42+
.type _start, %function
43+
_start:
44+
# FDATA: 0 [unknown] 0 1 _start 0 1 100
45+
.Lfunc_begin0:
46+
.cfi_startproc
47+
.cfi_lsda 27, .Lexception0
48+
call foo
49+
.Ltmp0:
50+
call foo
51+
.Ltmp1:
52+
ret
53+
54+
## Cold landing pad.
55+
.LLP1:
56+
ret
57+
58+
## Hot landing pad.
59+
LLP0:
60+
# FDATA: 0 [unknown] 0 1 _start #LLP0# 1 100
61+
ret
62+
63+
.cfi_endproc
64+
.Lfunc_end0:
65+
.size _start, .-_start
66+
67+
## EH table.
68+
.section .gcc_except_table,"a",@progbits
69+
.p2align 2
70+
GCC_except_table0:
71+
.Lexception0:
72+
.byte 255 # @LPStart Encoding = omit
73+
.byte 255 # @TType Encoding = omit
74+
.byte 1 # Call site Encoding = uleb128
75+
.uleb128 .Lcst_end0-.Lcst_begin0
76+
.Lcst_begin0:
77+
.uleb128 .Lfunc_begin0-.Lfunc_begin0 # >> Call Site 1 <<
78+
.uleb128 .Ltmp0-.Lfunc_begin0 # Call between .Lfunc_begin0 and .Ltmp0
79+
.uleb128 LLP0-.Lfunc_begin0 # jumps to LLP0
80+
.byte 0 # On action: cleanup
81+
.uleb128 .Ltmp0-.Lfunc_begin0 # >> Call Site 2 <<
82+
.uleb128 .Ltmp1-.Ltmp0 # Call between .Ltmp0 and .Ltmp1
83+
.uleb128 .LLP1-.Lfunc_begin0 # jumps to .LLP1
84+
.byte 0 # On action: cleanup
85+
.Lcst_end0:
86+

bolt/test/runtime/X86/Inputs/pie-exceptions-failed-split.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Assembly generated from building the followingC++ code with the following
1+
# Assembly generated from building the following C++ code with the following
22
# command using trunk clang. Then, basic block at .LBB1_7 was moved before the
33
# landing pad.
44
#

bolt/test/runtime/X86/pie-exceptions-failed-split.test renamed to bolt/test/runtime/X86/pie-exceptions-split.test

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,16 @@ RUN: llvm-bolt %t -o %t.bolt --data %t.fdata --reorder-blocks=ext-tsp \
1111
RUN: --split-functions --split-eh --print-after-lowering \
1212
RUN: --print-only=_Z10throw_testiPPc 2>&1 | FileCheck %s
1313

14-
## Hot code in the test case gets larger after splitting because of jump
15-
## instruction relaxation. Check that BOLT reverts the split correctly.
14+
## Check that a landing pad is split from its thrower and does not require a
15+
## trampoline LP.
1616
CHECK: Binary Function "_Z10throw_testiPPc"
1717
CHECK: IsSplit :
18-
CHECK-SAME: 0
19-
20-
## Check that the landing pad trampoline was created, but contains no
21-
## instructions and falls to the real landing pad.
22-
CHECK: {{^[^[:space:]]+}} (0 instructions
23-
CHECK-NEXT: Landing Pad{{$}}
24-
CHECK: Exec Count
25-
CHECK-SAME: : 0
26-
CHECK: Successors:
27-
CHECK-SAME: [[LP:[^[:space:]]+]]
28-
CHECK-EMPTY:
29-
CHECK-NEXT: [[LP]]
30-
CHECK-DAG: Exec Count
31-
CHECK-NOT: Exec Count
32-
CHECK-DAG: callq __cxa_begin_catch
18+
CHECK-SAME: 1
19+
CHECK: callq {{.*}} # handler: [[LPAD:.*]];
20+
CHECK-NOT: Landing Pad{{$}}
21+
CHECK: HOT-COLD SPLIT POINT
22+
CHECK: {{^}}[[LPAD]]
23+
CHECK-NEXT: Landing Pad
3324

3425
## Verify the output still executes correctly when the exception path is being
3526
## taken.

0 commit comments

Comments
 (0)