@@ -231,36 +231,71 @@ static void writePltHeaderLong(uint8_t *buf) {
231
231
// The default PLT header requires the .got.plt to be within 128 Mb of the
232
232
// .plt in the positive direction.
233
233
void ARM::writePltHeader(uint8_t *buf) const {
234
- // Use a similar sequence to that in writePlt(), the difference is the calling
235
- // conventions mean we use lr instead of ip. The PLT entry is responsible for
236
- // saving lr on the stack, the dynamic loader is responsible for reloading
237
- // it.
238
- const uint32_t pltData[] = {
239
- 0xe52de004, // L1: str lr, [sp,#-4]!
240
- 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
241
- 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
242
- 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
243
- };
244
-
245
- uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
246
- if (!llvm::isUInt<27>(offset)) {
247
- // We cannot encode the Offset, use the long form.
248
- writePltHeaderLong(buf);
249
- return;
234
+ if (config->armThumbPLTs) {
235
+ // The instruction sequence for thumb:
236
+ //
237
+ // 0: b500 push {lr}
238
+ // 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe>
239
+ // 6: 44fe add lr, pc
240
+ // 8: f85e ff08 ldr pc, [lr, #8]!
241
+ // e: .word .got.plt - .plt - 16
242
+ //
243
+ // At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
244
+ // `pc` in the add instruction and 8 bytes for the `lr` adjustment.
245
+ //
246
+ uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;
247
+ assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
248
+ write16(buf + 0, 0xb500);
249
+ // Split into two halves to support endianness correctly.
250
+ write16(buf + 2, 0xf8df);
251
+ write16(buf + 4, 0xe008);
252
+ write16(buf + 6, 0x44fe);
253
+ // Split into two halves to support endianness correctly.
254
+ write16(buf + 8, 0xf85e);
255
+ write16(buf + 10, 0xff08);
256
+ write32(buf + 12, offset);
257
+
258
+ memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
259
+ memcpy(buf + 20, trapInstr.data(), 4);
260
+ memcpy(buf + 24, trapInstr.data(), 4);
261
+ memcpy(buf + 28, trapInstr.data(), 4);
262
+ } else {
263
+ // Use a similar sequence to that in writePlt(), the difference is the
264
+ // calling conventions mean we use lr instead of ip. The PLT entry is
265
+ // responsible for saving lr on the stack, the dynamic loader is responsible
266
+ // for reloading it.
267
+ const uint32_t pltData[] = {
268
+ 0xe52de004, // L1: str lr, [sp,#-4]!
269
+ 0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
270
+ 0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
271
+ 0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
272
+ };
273
+
274
+ uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
275
+ if (!llvm::isUInt<27>(offset)) {
276
+ // We cannot encode the Offset, use the long form.
277
+ writePltHeaderLong(buf);
278
+ return;
279
+ }
280
+ write32(buf + 0, pltData[0]);
281
+ write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
282
+ write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
283
+ write32(buf + 12, pltData[3] | (offset & 0xfff));
284
+ memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
285
+ memcpy(buf + 20, trapInstr.data(), 4);
286
+ memcpy(buf + 24, trapInstr.data(), 4);
287
+ memcpy(buf + 28, trapInstr.data(), 4);
250
288
}
251
- write32(buf + 0, pltData[0]);
252
- write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
253
- write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
254
- write32(buf + 12, pltData[3] | (offset & 0xfff));
255
- memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
256
- memcpy(buf + 20, trapInstr.data(), 4);
257
- memcpy(buf + 24, trapInstr.data(), 4);
258
- memcpy(buf + 28, trapInstr.data(), 4);
259
289
}
260
290
261
291
void ARM::addPltHeaderSymbols(InputSection &isec) const {
262
- addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
263
- addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
292
+ if (config->armThumbPLTs) {
293
+ addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);
294
+ addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);
295
+ } else {
296
+ addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
297
+ addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
298
+ }
264
299
}
265
300
266
301
// Long form PLT entries that do not have any restrictions on the displacement
@@ -279,32 +314,65 @@ static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
279
314
// .plt in the positive direction.
280
315
void ARM::writePlt(uint8_t *buf, const Symbol &sym,
281
316
uint64_t pltEntryAddr) const {
282
- // The PLT entry is similar to the example given in Appendix A of ELF for
283
- // the Arm Architecture. Instead of using the Group Relocations to find the
284
- // optimal rotation for the 8-bit immediate used in the add instructions we
285
- // hard code the most compact rotations for simplicity. This saves a load
286
- // instruction over the long plt sequences.
287
- const uint32_t pltData[] = {
288
- 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
289
- 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
290
- 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
291
- };
292
317
293
- uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
294
- if (!llvm::isUInt<27>(offset)) {
295
- // We cannot encode the Offset, use the long form.
296
- writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
297
- return;
318
+ if (!config->armThumbPLTs) {
319
+ uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
320
+
321
+ // The PLT entry is similar to the example given in Appendix A of ELF for
322
+ // the Arm Architecture. Instead of using the Group Relocations to find the
323
+ // optimal rotation for the 8-bit immediate used in the add instructions we
324
+ // hard code the most compact rotations for simplicity. This saves a load
325
+ // instruction over the long plt sequences.
326
+ const uint32_t pltData[] = {
327
+ 0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
328
+ 0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
329
+ 0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
330
+ };
331
+ if (!llvm::isUInt<27>(offset)) {
332
+ // We cannot encode the Offset, use the long form.
333
+ writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
334
+ return;
335
+ }
336
+ write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
337
+ write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
338
+ write32(buf + 8, pltData[2] | (offset & 0xfff));
339
+ memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
340
+ } else {
341
+ uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12;
342
+ assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
343
+
344
+ // A PLT entry will be:
345
+ //
346
+ // movw ip, #<lower 16 bits>
347
+ // movt ip, #<upper 16 bits>
348
+ // add ip, pc
349
+ // L1: ldr.w pc, [ip]
350
+ // b L1
351
+ //
352
+ // where ip = r12 = 0xc
353
+
354
+ // movw ip, #<lower 16 bits>
355
+ write16(buf + 2, 0x0c00); // use `ip`
356
+ relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset);
357
+
358
+ // movt ip, #<upper 16 bits>
359
+ write16(buf + 6, 0x0c00); // use `ip`
360
+ relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset);
361
+
362
+ write16(buf + 8, 0x44fc); // add ip, pc
363
+ write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half)
364
+ write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half)
365
+ write16(buf + 14, 0xe7fc); // Branch to previous instruction
298
366
}
299
- write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
300
- write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
301
- write32(buf + 8, pltData[2] | (offset & 0xfff));
302
- memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
303
367
}
304
368
305
369
void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
306
- addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
307
- addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
370
+ if (config->armThumbPLTs) {
371
+ addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);
372
+ } else {
373
+ addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
374
+ addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
375
+ }
308
376
}
309
377
310
378
bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
@@ -325,6 +393,8 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
325
393
case R_ARM_JUMP24:
326
394
// Source is ARM, all PLT entries are ARM so no interworking required.
327
395
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
396
+ assert(!config->armThumbPLTs &&
397
+ "If the source is ARM, we should not need Thumb PLTs");
328
398
if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
329
399
return true;
330
400
[[fallthrough]];
@@ -335,9 +405,9 @@ bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
335
405
}
336
406
case R_ARM_THM_JUMP19:
337
407
case R_ARM_THM_JUMP24:
338
- // Source is Thumb, all PLT entries are ARM so interworking is required.
408
+ // Source is Thumb, when all PLT entries are ARM interworking is required.
339
409
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
340
- if (expr == R_PLT_PC || (s.isFunc() && (s.getVA() & 1) == 0))
410
+ if (( expr == R_PLT_PC && !config->armThumbPLTs) || (s.isFunc() && (s.getVA() & 1) == 0))
341
411
return true;
342
412
[[fallthrough]];
343
413
case R_ARM_THM_CALL: {
@@ -547,7 +617,6 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
547
617
// STT_FUNC we choose whether to write a BL or BLX depending on the
548
618
// value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
549
619
// not of type STT_FUNC then we must preserve the original instruction.
550
- // PLT entries are always ARM state so we know we don't need to interwork.
551
620
assert(rel.sym); // R_ARM_CALL is always reached via relocate().
552
621
bool bit0Thumb = val & 1;
553
622
bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000;
@@ -606,12 +675,13 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
606
675
// PLT entries are always ARM state so we know we need to interwork.
607
676
assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
608
677
bool bit0Thumb = val & 1;
678
+ bool useThumb = bit0Thumb || config->armThumbPLTs;
609
679
bool isBlx = (read16(loc + 2) & 0x1000) == 0;
610
680
// lld 10.0 and before always used bit0Thumb when deciding to write a BLX
611
- // even when type not STT_FUNC. PLT entries generated by LLD are always ARM.
612
- if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == bit0Thumb )
681
+ // even when type not STT_FUNC.
682
+ if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb )
613
683
stateChangeWarning(loc, rel.type, *rel.sym);
614
- if (rel.sym->isFunc() || rel.sym->isInPlt() ? !bit0Thumb : isBlx) {
684
+ if (( rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) {
615
685
// We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
616
686
// the BLX instruction may only be two byte aligned. This must be done
617
687
// before overflow check.
0 commit comments