Skip to content

Commit deae5ee

Browse files
authored
[lld-macho] Fix branch extension logic compatibility with __objc_stubs (#137913)
Enhance branch extension logic to handle __objc_stubs identically to __stubs The branch extension algorithm currently has specific handling for the `__stubs` section: 1. It ensures all `__stubs` content is directly reachable via branches from the text section. 2. It calculates the latest text section address that might require thunks to reach the end of `__stubs`. The `__objc_stubs` section requires precisely the same handling due to its similar nature, but this was not implemented. This commit generalizes the existing logic so it applies consistently to both the `__stubs` and `__objc_stubs` sections, ensuring correct reachability and thunk placement for both. Without this change it's possible to get relocation errors during linking in scenarios where the `__objc_stubs` section is large enough.
1 parent b6d4b7e commit deae5ee

File tree

3 files changed

+99
-33
lines changed

3 files changed

+99
-33
lines changed

lld/MachO/ConcatOutputSection.cpp

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,16 @@ bool TextOutputSection::needsThunks() const {
133133
// we've already processed in this segment needs thunks, so do the
134134
// rest.
135135
bool needsThunks = parent && parent->needsThunks;
136+
137+
// Calculate the total size of all branch target sections
138+
uint64_t branchTargetsSize = in.stubs->getSize();
139+
140+
// Add the size of __objc_stubs section if it exists
141+
if (in.objcStubs && in.objcStubs->isNeeded())
142+
branchTargetsSize += in.objcStubs->getSize();
143+
136144
if (!needsThunks &&
137-
isecAddr - addr + in.stubs->getSize() <=
145+
isecAddr - addr + branchTargetsSize <=
138146
std::min(target->backwardBranchRange, target->forwardBranchRange))
139147
return false;
140148
// Yes, this program is large enough to need thunks.
@@ -148,11 +156,11 @@ bool TextOutputSection::needsThunks() const {
148156
auto *sym = cast<Symbol *>(r.referent);
149157
// Pre-populate the thunkMap and memoize call site counts for every
150158
// InputSection and ThunkInfo. We do this for the benefit of
151-
// estimateStubsInRangeVA().
159+
// estimateBranchTargetThresholdVA().
152160
ThunkInfo &thunkInfo = thunkMap[sym];
153161
// Knowing ThunkInfo call site count will help us know whether or not we
154162
// might need to create more for this referent at the time we are
155-
// estimating distance to __stubs in estimateStubsInRangeVA().
163+
// estimating distance to __stubs in estimateBranchTargetThresholdVA().
156164
++thunkInfo.callSiteCount;
157165
// We can avoid work on InputSections that have no BRANCH relocs.
158166
isec->hasCallSites = true;
@@ -161,19 +169,20 @@ bool TextOutputSection::needsThunks() const {
161169
return true;
162170
}
163171

164-
// Since __stubs is placed after __text, we must estimate the address
165-
// beyond which stubs are within range of a simple forward branch.
166-
// This is called exactly once, when the last input section has been finalized.
167-
uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
172+
// Estimate the address beyond which branch targets (like __stubs and
173+
// __objc_stubs) are within range of a simple forward branch. This is called
174+
// exactly once, when the last input section has been finalized.
175+
uint64_t
176+
TextOutputSection::estimateBranchTargetThresholdVA(size_t callIdx) const {
168177
// Tally the functions which still have call sites remaining to process,
169178
// which yields the maximum number of thunks we might yet place.
170179
size_t maxPotentialThunks = 0;
171180
for (auto &tp : thunkMap) {
172181
ThunkInfo &ti = tp.second;
173182
// This overcounts: Only sections that are in forward jump range from the
174183
// currently-active section get finalized, and all input sections are
175-
// finalized when estimateStubsInRangeVA() is called. So only backward
176-
// jumps will need thunks, but we count all jumps.
184+
// finalized when estimateBranchTargetThresholdVA() is called. So only
185+
// backward jumps will need thunks, but we count all jumps.
177186
if (ti.callSitesUsed < ti.callSiteCount)
178187
maxPotentialThunks += 1;
179188
}
@@ -200,7 +209,8 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
200209
assert(isecEnd - isecVA <= forwardBranchRange &&
201210
"should only finalize sections in jump range");
202211

203-
// Estimate the maximum size of the code, right before the stubs section.
212+
// Estimate the maximum size of the code, right before the branch target
213+
// sections.
204214
uint64_t maxTextSize = 0;
205215
// Add the size of all the inputs, including the unprocessed ones.
206216
maxTextSize += isecEnd;
@@ -214,21 +224,35 @@ uint64_t TextOutputSection::estimateStubsInRangeVA(size_t callIdx) const {
214224
// 'maxPotentialThunks' overcounts, this is an estimate of the upper limit.
215225
maxTextSize += maxPotentialThunks * target->thunkSize;
216226

217-
// Estimated maximum VA of last stub.
218-
uint64_t maxVAOfLastStub = maxTextSize + in.stubs->getSize();
227+
// Calculate the total size of all late branch target sections
228+
uint64_t branchTargetsSize = 0;
229+
230+
// Add the size of __stubs section
231+
branchTargetsSize += in.stubs->getSize();
232+
233+
// Add the size of __objc_stubs section if it exists
234+
if (in.objcStubs && in.objcStubs->isNeeded())
235+
branchTargetsSize += in.objcStubs->getSize();
236+
237+
// Estimated maximum VA of the last branch target.
238+
uint64_t maxVAOfLastBranchTarget = maxTextSize + branchTargetsSize;
219239

220-
// Estimate the address after which call sites can safely call stubs
240+
// Estimate the address after which call sites can safely call branch targets
221241
// directly rather than through intermediary thunks.
222-
uint64_t stubsInRangeVA = maxVAOfLastStub - forwardBranchRange;
242+
uint64_t branchTargetThresholdVA =
243+
maxVAOfLastBranchTarget - forwardBranchRange;
223244

224245
log("thunks = " + std::to_string(thunkMap.size()) +
225246
", potential = " + std::to_string(maxPotentialThunks) +
226-
", stubs = " + std::to_string(in.stubs->getSize()) + ", isecVA = " +
227-
utohexstr(isecVA) + ", threshold = " + utohexstr(stubsInRangeVA) +
228-
", isecEnd = " + utohexstr(isecEnd) +
247+
", stubs = " + std::to_string(in.stubs->getSize()) +
248+
(in.objcStubs && in.objcStubs->isNeeded()
249+
? ", objc_stubs = " + std::to_string(in.objcStubs->getSize())
250+
: "") +
251+
", isecVA = " + utohexstr(isecVA) + ", threshold = " +
252+
utohexstr(branchTargetThresholdVA) + ", isecEnd = " + utohexstr(isecEnd) +
229253
", tail = " + utohexstr(isecEnd - isecVA) +
230254
", slop = " + utohexstr(forwardBranchRange - (isecEnd - isecVA)));
231-
return stubsInRangeVA;
255+
return branchTargetThresholdVA;
232256
}
233257

234258
void ConcatOutputSection::finalizeOne(ConcatInputSection *isec) {
@@ -254,7 +278,7 @@ void TextOutputSection::finalize() {
254278

255279
uint64_t forwardBranchRange = target->forwardBranchRange;
256280
uint64_t backwardBranchRange = target->backwardBranchRange;
257-
uint64_t stubsInRangeVA = TargetInfo::outOfRangeVA;
281+
uint64_t branchTargetThresholdVA = TargetInfo::outOfRangeVA;
258282
size_t thunkSize = target->thunkSize;
259283
size_t relocCount = 0;
260284
size_t callSiteCount = 0;
@@ -297,16 +321,18 @@ void TextOutputSection::finalize() {
297321
if (!isec->hasCallSites)
298322
continue;
299323

300-
if (finalIdx == endIdx && stubsInRangeVA == TargetInfo::outOfRangeVA) {
301-
// When we have finalized all input sections, __stubs (destined
302-
// to follow __text) comes within range of forward branches and
303-
// we can estimate the threshold address after which we can
304-
// reach any stub with a forward branch. Note that although it
305-
// sits in the middle of a loop, this code executes only once.
324+
if (finalIdx == endIdx &&
325+
branchTargetThresholdVA == TargetInfo::outOfRangeVA) {
326+
// When we have finalized all input sections, branch target sections (like
327+
// __stubs and __objc_stubs) (destined to follow __text) come within range
328+
// of forward branches and we can estimate the threshold address after
329+
// which we can reach any branch target with a forward branch. Note that
330+
// although it sits in the middle of a loop, this code executes only once.
306331
// It is in the loop because we need to call it at the proper
307332
// time: the earliest call site from which the end of __text
308-
// (and start of __stubs) comes within range of a forward branch.
309-
stubsInRangeVA = estimateStubsInRangeVA(callIdx);
333+
// (and start of branch target sections) comes within range of a forward
334+
// branch.
335+
branchTargetThresholdVA = estimateBranchTargetThresholdVA(callIdx);
310336
}
311337
// Process relocs by ascending address, i.e., ascending offset within isec
312338
std::vector<Reloc> &relocs = isec->relocs;
@@ -328,10 +354,14 @@ void TextOutputSection::finalize() {
328354
auto *funcSym = cast<Symbol *>(r.referent);
329355
ThunkInfo &thunkInfo = thunkMap[funcSym];
330356
// The referent is not reachable, so we need to use a thunk ...
331-
if (funcSym->isInStubs() && callVA >= stubsInRangeVA) {
357+
if ((funcSym->isInStubs() ||
358+
(in.objcStubs && in.objcStubs->isNeeded() &&
359+
ObjCStubsSection::isObjCStubSymbol(funcSym))) &&
360+
callVA >= branchTargetThresholdVA) {
332361
assert(callVA != TargetInfo::outOfRangeVA);
333-
// ... Oh, wait! We are close enough to the end that __stubs
334-
// are now within range of a simple forward branch.
362+
// ... Oh, wait! We are close enough to the end that branch target
363+
// sections (__stubs, __objc_stubs) are now within range of a simple
364+
// forward branch.
335365
continue;
336366
}
337367
uint64_t funcVA = funcSym->resolveBranchVA();

lld/MachO/ConcatOutputSection.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class TextOutputSection : public ConcatOutputSection {
8080
}
8181

8282
private:
83-
uint64_t estimateStubsInRangeVA(size_t callIdx) const;
83+
uint64_t estimateBranchTargetThresholdVA(size_t callIdx) const;
8484

8585
std::vector<ConcatInputSection *> thunks;
8686
};

lld/test/MachO/arm64-thunks.s

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,28 @@
1818
## with safe_thunks ICF.
1919
# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o --icf=safe_thunks
2020
# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s
21+
# RUN: llvm-objdump --macho --section-headers %t/thunk > %t/headers.txt
22+
# RUN: llvm-otool -vs __DATA __objc_selrefs %t/thunk >> %t/headers.txt
23+
# RUN: llvm-otool -vs __TEXT __objc_stubs %t/thunk >> %t/headers.txt
24+
# RUN: FileCheck %s --check-prefix=OBJC < %t/headers.txt
2125

2226
# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP
23-
27+
28+
# OBJC: Sections:
29+
# OBJC: __text
30+
# OBJC-NEXT: __lcxx_override
31+
# OBJC-NEXT: __stubs
32+
# OBJC-NEXT: __stub_helper
33+
# OBJC-NEXT: __objc_stubs
34+
35+
# OBJC: Contents of (__DATA,__objc_selrefs) section
36+
# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:foo
37+
# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:bar
38+
39+
# OBJC: Contents of (__TEXT,__objc_stubs) section
40+
# OBJC: _objc_msgSend$bar:
41+
# OBJC: _objc_msgSend$foo:
42+
2443
# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr
2544
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a
2645
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b
@@ -45,7 +64,6 @@
4564
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1
4665
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1
4766
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0
48-
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ltmp0.thunk.0
4967
# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z
5068

5169

@@ -206,6 +224,22 @@
206224

207225
# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>:
208226

227+
.section __TEXT,__objc_methname,cstring_literals
228+
lselref1:
229+
.asciz "foo"
230+
lselref2:
231+
.asciz "bar"
232+
233+
.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip
234+
.p2align 3
235+
.quad lselref1
236+
.quad lselref2
237+
238+
.text
239+
.globl _objc_msgSend
240+
_objc_msgSend:
241+
ret
242+
209243
.subsections_via_symbols
210244

211245
.addrsig
@@ -352,6 +386,8 @@ _main:
352386
bl _fold_func_low_addr
353387
bl _fold_func_high_addr
354388
bl ___nan
389+
bl _objc_msgSend$foo
390+
bl _objc_msgSend$bar
355391
ret
356392

357393
.globl _fold_func_high_addr

0 commit comments

Comments
 (0)