-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[lld][LoongArch] Relax call36/tail36: R_LARCH_CALL36 #123576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
ea9fea2
95f4540
7b133c2
abc1a45
1b1804e
30cb382
f1f995b
f227ae5
f2aae15
924d511
b9c2ea1
1101829
b3900f6
a624904
03645ef
52bec2b
ff1567b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,10 +53,13 @@ enum Op { | |
ADDI_W = 0x02800000, | ||
ADDI_D = 0x02c00000, | ||
ANDI = 0x03400000, | ||
PCADDI = 0x18000000, | ||
PCADDU12I = 0x1c000000, | ||
LD_W = 0x28800000, | ||
LD_D = 0x28c00000, | ||
JIRL = 0x4c000000, | ||
B = 0x50000000, | ||
BL = 0x54000000, | ||
}; | ||
|
||
enum Reg { | ||
|
@@ -131,6 +134,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { | |
return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end; | ||
} | ||
|
||
static uint32_t getD5(uint64_t v) { return extractBits(v, 4, 0); } | ||
|
||
static uint32_t getJ5(uint64_t v) { return extractBits(v, 9, 5); } | ||
|
||
static uint32_t setD5k16(uint32_t insn, uint32_t imm) { | ||
uint32_t immLo = extractBits(imm, 15, 0); | ||
uint32_t immHi = extractBits(imm, 20, 16); | ||
|
@@ -743,6 +750,119 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, | |
} | ||
} | ||
|
||
static bool relaxable(ArrayRef<Relocation> relocs, size_t i) { | ||
return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX; | ||
} | ||
|
||
static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) { | ||
return relaxable(relocs, i) && relaxable(relocs, i + 2) && | ||
relocs[i].offset + 4 == relocs[i + 2].offset; | ||
} | ||
|
||
// Relax code sequence. | ||
// From: | ||
// pcalau12i $a0, %pc_hi20(sym) | ||
// addi.w/d $a0, $a0, %pc_lo12(sym) | ||
// To: | ||
// pcaddi $a0, %pc_lo12(sym) | ||
// | ||
// From: | ||
// pcalau12i $a0, %got_pc_hi20(sym_got) | ||
// ld.w/d $a0, $a0, %got_pc_lo12(sym_got) | ||
// To: | ||
// pcaddi $a0, %got_pc_hi20(sym_got) | ||
static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i, | ||
uint64_t loc, Relocation &rHi20, Relocation &rLo12, | ||
uint32_t &remove) { | ||
// check if the relocations are relaxable sequences. | ||
if (!((rHi20.type == R_LARCH_PCALA_HI20 && | ||
rLo12.type == R_LARCH_PCALA_LO12) || | ||
(rHi20.type == R_LARCH_GOT_PC_HI20 && | ||
rLo12.type == R_LARCH_GOT_PC_LO12))) | ||
return; | ||
|
||
// GOT references to absolute symbols can't be relaxed to use pcaddi in | ||
// position-independent code, because these instructions produce a relative | ||
// address. | ||
// Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because | ||
// these symbols may be resolve in runtime. | ||
if (rHi20.type == R_LARCH_GOT_PC_HI20 && | ||
(!rHi20.sym->isDefined() || rHi20.sym->isPreemptible || | ||
rHi20.sym->isGnuIFunc() || | ||
(ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section))) | ||
return; | ||
|
||
uint64_t symBase = 0; | ||
if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC) | ||
symBase = rHi20.sym->getPltVA(ctx); | ||
else if (rHi20.expr == RE_LOONGARCH_PAGE_PC || | ||
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC) | ||
symBase = rHi20.sym->getVA(ctx); | ||
else { | ||
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr (" | ||
<< rHi20.expr << ") against symbol " << rHi20.sym | ||
<< "in relaxPCHi20Lo12"; | ||
return; | ||
} | ||
const uint64_t symLocal = symBase + rHi20.addend; | ||
|
||
const int64_t distance = symLocal - loc; | ||
// Check if the distance aligns 4 bytes or exceeds the range of pcaddi. | ||
if ((distance & 0x3) != 0 || !isInt<22>(distance)) | ||
return; | ||
|
||
// Note: If we can ensure that the .o files generated by LLVM only contain | ||
// relaxable instruction sequences with R_LARCH_RELAX, then we do not need to | ||
// decode instructions. The relaxable instruction sequences imply the | ||
// following constraints: | ||
// * For relocation pairs related to got_pc, the opcodes of instructions | ||
// must be pcalau12i + ld.w/d. In other cases, the opcodes must be pcalau12i + | ||
// addi.w/d. | ||
// * The destination register of pcalau12i is guaranteed to be used only by | ||
// the immediately following instruction. | ||
const uint32_t currInsn = read32le(sec.content().data() + rHi20.offset); | ||
const uint32_t nextInsn = read32le(sec.content().data() + rLo12.offset); | ||
// Check if use the same register. | ||
if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn)) | ||
return; | ||
|
||
sec.relaxAux->relocTypes[i] = R_LARCH_RELAX; | ||
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2; | ||
sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0)); | ||
remove = 4; | ||
} | ||
|
||
// Relax code sequence. | ||
// From: | ||
// pcaddu18i $ra, %call36(foo) | ||
// jirl $ra, $ra, 0 | ||
// To: | ||
// b/bl foo | ||
static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i, | ||
uint64_t loc, Relocation &r, uint32_t &remove) { | ||
const uint64_t symLocal = | ||
(r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) + | ||
r.addend; | ||
|
||
const int64_t distance = symLocal - loc; | ||
ylzsx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Check if the distance aligns 4 bytes or exceeds the range of b[l]. | ||
if ((distance & 0x3) != 0 || !isInt<28>(distance)) | ||
return; | ||
|
||
const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To match There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this is not necessary. Here are the reasons:
It would be greatly appreciated if you could provide an example of an illegal case or present more substantial reasons. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think it's probably okay to not add more checks, but for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After further consideration and revisiting the manual again(https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc), As for the interoperability you mentioned, Additionally, if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm not objecting to the current revision per se, but rather behavior consistency, no matter whether the input is well-formed or not -- the so-called "bug-for-bug compatibility". This means:
so eventually we're going to have 2 consistent implementations of LoongArch ELF psABI and happy users. What do you think here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed with developers of ld's loongarch port, but they think the check is necessary in ld and lld don't need to align with it. How about adding an assertion here as a compromise? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I have added an assertion to check jirl. 52bec2b There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for the late reply -- I didn't realize "assertion" is just But now debug builds of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
@MaskRay Would you agree to adding a check for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still prefer that we remove the checks. We should avoid assert, when it is actually reachable. If BFD wants to be guard against malformed input, that's ok. It's fine to diverge from it. |
||
if (getD5(nextInsn) == R_RA) { | ||
// convert jirl to bl | ||
sec.relaxAux->relocTypes[i] = R_LARCH_B26; | ||
sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0)); | ||
remove = 4; | ||
} else if (getD5(nextInsn) == R_ZERO) { | ||
// convert jirl to b | ||
sec.relaxAux->relocTypes[i] = R_LARCH_B26; | ||
sec.relaxAux->writes.push_back(insn(B, 0, 0, 0)); | ||
remove = 4; | ||
} | ||
} | ||
|
||
static bool relax(Ctx &ctx, InputSection &sec) { | ||
const uint64_t secAddr = sec.getVA(); | ||
const MutableArrayRef<Relocation> relocs = sec.relocs(); | ||
|
@@ -781,6 +901,16 @@ static bool relax(Ctx &ctx, InputSection &sec) { | |
} | ||
break; | ||
} | ||
case R_LARCH_PCALA_HI20: | ||
case R_LARCH_GOT_PC_HI20: | ||
// The overflow check for i+2 will be carried out in isPairRelaxable. | ||
if (isPairRelaxable(relocs, i)) | ||
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove); | ||
break; | ||
case R_LARCH_CALL36: | ||
if (relaxable(relocs, i)) | ||
relaxCall36(ctx, sec, i, loc, r, remove); | ||
break; | ||
} | ||
|
||
// For all anchors whose offsets are <= r.offset, they are preceded by | ||
|
@@ -851,6 +981,7 @@ void LoongArch::finalizeRelax(int passes) const { | |
MutableArrayRef<Relocation> rels = sec->relocs(); | ||
ArrayRef<uint8_t> old = sec->content(); | ||
size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1]; | ||
size_t writesIdx = 0; | ||
uint8_t *p = ctx.bAlloc.Allocate<uint8_t>(newSize); | ||
uint64_t offset = 0; | ||
int64_t delta = 0; | ||
|
@@ -867,11 +998,33 @@ void LoongArch::finalizeRelax(int passes) const { | |
continue; | ||
|
||
// Copy from last location to the current relocated location. | ||
const Relocation &r = rels[i]; | ||
Relocation &r = rels[i]; | ||
uint64_t size = r.offset - offset; | ||
memcpy(p, old.data() + offset, size); | ||
p += size; | ||
offset = r.offset + remove; | ||
|
||
int64_t skip = 0; | ||
if (RelType newType = aux.relocTypes[i]) { | ||
switch (newType) { | ||
case R_LARCH_RELAX: | ||
break; | ||
case R_LARCH_PCREL20_S2: | ||
skip = 4; | ||
write32le(p, aux.writes[writesIdx++]); | ||
// RelExpr is needed for relocating. | ||
r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC; | ||
break; | ||
case R_LARCH_B26: | ||
skip = 4; | ||
write32le(p, aux.writes[writesIdx++]); | ||
break; | ||
default: | ||
llvm_unreachable("unsupported type"); | ||
} | ||
} | ||
|
||
p += skip; | ||
offset = r.offset + skip + remove; | ||
} | ||
memcpy(p, old.data() + offset, old.size() - offset); | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.