Skip to content

Commit 162814a

Browse files
authored
[lld-macho] Include branch extension thunks in linker map (#120496)
This patch extends the MachO linker's map file generation to include branch extension thunk symbols. Previously, thunks were omitted from the map file, making it difficult to understand the final layout of the binary, especially when debugging issues related to long branch thunks. This change ensures thunks are included and correctly interleaved with other symbols based on their address, providing an accurate representation of the linked output.
1 parent b4ae419 commit 162814a

File tree

4 files changed

+64
-6
lines changed

4 files changed

+64
-6
lines changed

lld/MachO/ConcatOutputSection.h

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ class Defined;
2525
// in the final binary.
2626
class ConcatOutputSection : public OutputSection {
2727
public:
28-
explicit ConcatOutputSection(StringRef name)
29-
: OutputSection(ConcatKind, name) {}
28+
explicit ConcatOutputSection(StringRef name,
29+
OutputSection::Kind kind = ConcatKind)
30+
: OutputSection(kind, name) {}
3031

3132
const ConcatInputSection *firstSection() const { return inputs.front(); }
3233
const ConcatInputSection *lastSection() const { return inputs.back(); }
@@ -46,7 +47,7 @@ class ConcatOutputSection : public OutputSection {
4647
void writeTo(uint8_t *buf) const override;
4748

4849
static bool classof(const OutputSection *sec) {
49-
return sec->kind() == ConcatKind;
50+
return sec->kind() == ConcatKind || sec->kind() == TextKind;
5051
}
5152

5253
static ConcatOutputSection *getOrCreateForInput(const InputSection *);
@@ -66,12 +67,18 @@ class ConcatOutputSection : public OutputSection {
6667
// support thunk insertion.
6768
class TextOutputSection : public ConcatOutputSection {
6869
public:
69-
explicit TextOutputSection(StringRef name) : ConcatOutputSection(name) {}
70+
explicit TextOutputSection(StringRef name)
71+
: ConcatOutputSection(name, TextKind) {}
7072
void finalizeContents() override {}
7173
void finalize() override;
7274
bool needsThunks() const;
75+
ArrayRef<ConcatInputSection *> getThunks() const { return thunks; }
7376
void writeTo(uint8_t *buf) const override;
7477

78+
static bool classof(const OutputSection *sec) {
79+
return sec->kind() == TextKind;
80+
}
81+
7582
private:
7683
uint64_t estimateStubsInRangeVA(size_t callIdx) const;
7784

lld/MachO/MapFile.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,20 @@ static uint64_t getSymSizeForMap(Defined *sym) {
161161
return sym->size;
162162
}
163163

164+
// Merges two vectors of input sections in order of their outSecOff values.
165+
// This approach creates a new (temporary) vector which is not ideal but the
166+
// ideal approach leads to a lot of code duplication.
167+
static std::vector<ConcatInputSection *>
168+
mergeOrderedInputs(ArrayRef<ConcatInputSection *> inputs1,
169+
ArrayRef<ConcatInputSection *> inputs2) {
170+
std::vector<ConcatInputSection *> vec(inputs1.size() + inputs2.size());
171+
std::merge(inputs1.begin(), inputs1.end(), inputs2.begin(), inputs2.end(),
172+
vec.begin(), [](ConcatInputSection *a, ConcatInputSection *b) {
173+
return a->outSecOff < b->outSecOff;
174+
});
175+
return vec;
176+
}
177+
164178
void macho::writeMapFile() {
165179
if (config->mapFile.empty())
166180
return;
@@ -220,7 +234,11 @@ void macho::writeMapFile() {
220234
os << "# Address\tSize \tFile Name\n";
221235
for (const OutputSegment *seg : outputSegments) {
222236
for (const OutputSection *osec : seg->getSections()) {
223-
if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
237+
if (auto *textOsec = dyn_cast<TextOutputSection>(osec)) {
238+
auto inputsAndThunks =
239+
mergeOrderedInputs(textOsec->inputs, textOsec->getThunks());
240+
printIsecArrSyms(inputsAndThunks);
241+
} else if (auto *concatOsec = dyn_cast<ConcatOutputSection>(osec)) {
224242
printIsecArrSyms(concatOsec->inputs);
225243
} else if (osec == in.cStringSection || osec == in.objcMethnameSection) {
226244
const auto &liveCStrings = info.liveCStringsForSection.lookup(osec);

lld/MachO/OutputSection.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class OutputSection {
3737
enum Kind {
3838
ConcatKind,
3939
SyntheticKind,
40+
TextKind,
4041
};
4142

4243
OutputSection(Kind kind, StringRef name) : name(name), sectionKind(kind) {}

lld/test/MachO/arm64-thunks.s

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,46 @@
88
## (4) early calls to a dylib stub use a thunk, and later calls the stub
99
## directly
1010
## (5) Thunks are created for all sections in the text segment with branches.
11+
## (6) Thunks are in the linker map file.
1112
## Notes:
1213
## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range
1314

1415
# RUN: rm -rf %t; mkdir %t
1516
# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o
16-
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -o %t/thunk %t/input.o
17+
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o
1718
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
1819

20+
## Check that the thunks appear in the map file and that everything is sorted by address
21+
# Because of the `.space` instructions, there will end up being a lot of dead symbols in the
22+
# linker map (linker map will be ~2.7GB). So to avoid the test trying to (slowly) match regex
23+
# across all the ~2.7GB of the linker map - generate a version of the linker map without dead symbols.
24+
# RUN: awk '/# Dead Stripped Symbols:/ {exit} {print}' %t/thunk.map > %t/thunk_no_dead_syms.map
25+
26+
# RUN: FileCheck %s --input-file %t/thunk_no_dead_syms.map --check-prefix=MAP
27+
28+
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _b
29+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c
30+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.0
31+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.0
32+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.0
33+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _g.thunk.0
34+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h.thunk.0
35+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ___nan.thunk.0
36+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d
37+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e
38+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f
39+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _g
40+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a.thunk.0
41+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0
42+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h
43+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main
44+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0
45+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1
46+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
47+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
48+
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
49+
50+
1951
# CHECK: Disassembly of section __TEXT,__text:
2052

2153
# CHECK: [[#%.13x, A_PAGE:]][[#%.3x, A_OFFSET:]] <_a>:

0 commit comments

Comments
 (0)