Skip to content

[lld][ELF][LoongArch] Support relaxing R_LARCH_CALL36 #127312

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,12 @@ enum Op {
ANDI = 0x03400000,
PCADDI = 0x18000000,
PCADDU12I = 0x1c000000,
PCADDU18I = 0x1e000000,
LD_W = 0x28800000,
LD_D = 0x28c00000,
JIRL = 0x4c000000,
B = 0x50000000,
BL = 0x54000000,
};

enum Reg {
Expand Down Expand Up @@ -830,6 +833,45 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
remove = 4;
}

static bool isInsnPairCall36(uint64_t pair) {
const uint32_t insn1 = extractBits(pair, 31, 0);
const uint32_t insn2 = extractBits(pair, 63, 32);
if ((insn1 & 0xfe000000) != PCADDU18I)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an instruction that #123576 doesn't have. We typically avoid instruction checks unless absolutely necessary.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ld.bfd checks for jirl but not pcaddu18i, nor does ld.bfd ensure jirl.rj == pcaddu18i.rd. Granted, these are irrelevant for well-formed inputs, but I think such checks should get added to ld.bfd (or indeed, the psABI) as well, for the guarantee that "relaxation does not affect code semantics" to uphold.

return false;
if ((insn2 & 0xfc000000) != JIRL)
return false;

const uint32_t rd1 = extractBits(insn1, 4, 0);
const uint32_t rd2 = extractBits(insn2, 4, 0);
const uint32_t rj2 = extractBits(insn2, 9, 5);
if (rd1 != rj2)
return false;
if (rd2 != R_ZERO && rd2 != R_RA)
return false;

return true;
}

// Relax R_LARCH_CALL36 pcaddu18i+jirl to b or bl.
static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
const Symbol &sym = *r.sym;
const uint64_t insnPair = read64le(sec.content().data() + r.offset);
if (!isInsnPairCall36(insnPair))
return;

const bool isTail = extractBits(insnPair, 32 + 4, 32 + 0) == R_ZERO;
const uint64_t dest =
(r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
const int64_t displace = dest - loc;

if (isInt<28>(displace) && !(displace & 0x3)) {
sec.relaxAux->relocTypes[i] = R_LARCH_B26;
sec.relaxAux->writes.push_back(isTail ? B : BL);
remove = 4;
}
}

static bool relax(Ctx &ctx, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
Expand Down Expand Up @@ -874,6 +916,10 @@ static bool relax(Ctx &ctx, InputSection &sec) {
if (isPairRelaxable(relocs, i))
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
break;
case R_LARCH_CALL36:
if (relaxable(relocs, i))
relaxCall(ctx, sec, i, loc, r, remove);
break;
}

// For all anchors whose offsets are <= r.offset, they are preceded by
Expand Down Expand Up @@ -971,6 +1017,7 @@ void LoongArch::finalizeRelax(int passes) const {
switch (newType) {
case R_LARCH_RELAX:
break;
case R_LARCH_B26:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#123576 does not change r.expr. Which one is appropriate?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, it should remain R_PLT_PC. I'll see if I should just close this instead of #123576 because I didn't know that PR even existed until today morning.

case R_LARCH_PCREL20_S2:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
Expand Down
131 changes: 131 additions & 0 deletions lld/test/ELF/loongarch-relax-call36.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# REQUIRES: loongarch
## Relax R_LARCH_CALL36.
## Currently only loongarch64 is covered, because the call36 pseudo-instruction
## is valid for LA64 only, due to LA32 not having pcaddu18i.

# TODO:
#
# * trivial cases
# * +/- limit: -4, 0, +4
# * align: 0, 1, 2, 3
# * invalid pcaddu18i + jirl pairs
# - rd1 != rj2
# - rd2 not in (0, 1)

# RUN: rm -rf %t && split-file %s %t && cd %t
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o

# RUN: ld.lld -T lds a.o -o a
# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s

## Unsure whether this needs a diagnostic. GNU ld allows this.
# RUN: ld.lld -T lds -pie a.o -o a.pie
# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s

# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt
# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefix=CHECK2

# CHECK-LABEL: <_start>:
# CHECK-NEXT: bl -4 <near_before>
# CHECK-NEXT: b -8 <near_before>
# CHECK-NEXT: bl 64 <near_after>
# CHECK-NEXT: b 60 <near_after>
# CHECK-NEXT: pcaddu18i $ra, -512
# CHECK-NEXT: jirl $ra, $ra, -4
# CHECK-NEXT: bl -134217728 <far_b>
# CHECK-NEXT: bl 134217724 <far_y>
# CHECK-NEXT: pcaddu18i $ra, 512
# CHECK-NEXT: jirl $ra, $ra, 0
# CHECK-NEXT: pcaddu18i $t0, 0
# CHECK-NEXT: jirl $t0, $t0, -44
# CHECK-NEXT: pcaddu18i $t0, 0
# CHECK-NEXT: jirl $zero, $t1, 24
# CHECK-NEXT: pcalau12i $t0, 0
# CHECK-NEXT: jirl $zero, $t0, -60
# CHECK-NEXT: pcaddu18i $t0, 0
# CHECK-NEXT: addu16i.d $t0, $t0, 2
# CHECK-EMPTY:

# CHECK-LABEL: <.mid>:
# CHECK-NEXT: b 2048
# CHECK-NEXT: b 2044
# CHECK-EMPTY:

# CHECK2-LABEL: <.mid>:
# CHECK2-NEXT: pcaddu18i $t0, 0
# CHECK2-NEXT: jr $t0
# CHECK2-NEXT: pcaddu18i $t0, 0
# CHECK2-NEXT: jr $t0
# CHECK2-EMPTY:

#--- a.s
.global _start, ifunc
near_before:
ret

_start:
call36 near_before
tail36 $t0, near_before

call36 near_after
tail36 $t0, near_after

call36 far_a ## just out of relaxable range: 0x08000010 - 0x10000014 = -(1 << 27) - 4
call36 far_b ## just in relaxable range: 0x0800001c - 0x1000001c = -(1 << 27)

call36 far_y ## just in relaxable range: 0x1800001c - 0x10000020 = (1 << 27) - 4
call36 far_z ## just out of relaxable range: 0x18000024 - 0x10000024 = 1 << 27

## broken R_LARCH_CALL36 usages should not be relaxed even if relaxable
## otherwise
## correctness is not guaranteed for malformed input like these

## jirl link register (rd) not $zero or $ra (hence not expressible by B or BL)
## the apparent correctness here is only coincidence and should not be relied
## upon
.reloc ., R_LARCH_CALL36, near_before
.reloc ., R_LARCH_RELAX, 0
pcaddu18i $t0, 0
jirl $t0, $t0, 0

## jirl base != pcaddu18i output
.reloc ., R_LARCH_CALL36, near_after
.reloc ., R_LARCH_RELAX, 0
pcaddu18i $t0, 0
jirl $zero, $t1, 0

## 1st insn not pcaddu18i
.reloc ., R_LARCH_CALL36, near_before
.reloc ., R_LARCH_RELAX, 0
pcalau12i $t0, 0
jirl $zero, $t0, 0

## 2nd insn not jirl
.reloc ., R_LARCH_CALL36, near_after
.reloc ., R_LARCH_RELAX, 0
pcaddu18i $t0, 0
addu16i.d $t0, $t0, 0

near_after:
ret

.section .mid,"ax",@progbits
.balign 16
tail36 $t0, ifunc@plt
tail36 $t0, ifunc@plt

.type ifunc, @gnu_indirect_function
ifunc:
ret

#--- lds
SECTIONS {
.text 0x10000000 : { *(.text) }
.mid 0x10000800 : { *(.mid) }
.iplt 0x10001000 : { *(.iplt) }
}

far_a = 0x08000010;
far_b = 0x0800001c;
far_y = 0x1800001c;
far_z = 0x18000024;