Skip to content

Commit c2640d5

Browse files
committed
[lld][ELF][LoongArch] Support relaxing R_LARCH_CALL36
Relax eligible PCADDU18I + JIRL sequences to B or BL depending on JIRL's output (link) register. Correctness is maintained on a best-effort basis by ensuring the underlying instruction pair is PCADDU18I and JIRL, and that the register operands involved are appropriate. This is beneficial performance-wise for code compiled with the medium code model, and enables future changing of the default code model from "small" to "medium" without runtime performance impact.
1 parent 70b95ca commit c2640d5

File tree

2 files changed

+178
-0
lines changed

2 files changed

+178
-0
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,12 @@ enum Op {
5555
ANDI = 0x03400000,
5656
PCADDI = 0x18000000,
5757
PCADDU12I = 0x1c000000,
58+
PCADDU18I = 0x1e000000,
5859
LD_W = 0x28800000,
5960
LD_D = 0x28c00000,
6061
JIRL = 0x4c000000,
62+
B = 0x50000000,
63+
BL = 0x54000000,
6164
};
6265

6366
enum Reg {
@@ -830,6 +833,45 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
830833
remove = 4;
831834
}
832835

836+
static bool isInsnPairCall36(uint64_t pair) {
837+
const uint32_t insn1 = extractBits(pair, 31, 0);
838+
const uint32_t insn2 = extractBits(pair, 63, 32);
839+
if ((insn1 & 0xfe000000) != PCADDU18I)
840+
return false;
841+
if ((insn2 & 0xfc000000) != JIRL)
842+
return false;
843+
844+
const uint32_t rd1 = extractBits(insn1, 4, 0);
845+
const uint32_t rd2 = extractBits(insn2, 4, 0);
846+
const uint32_t rj2 = extractBits(insn2, 9, 5);
847+
if (rd1 != rj2)
848+
return false;
849+
if (rd2 != R_ZERO && rd2 != R_RA)
850+
return false;
851+
852+
return true;
853+
}
854+
855+
// Relax R_LARCH_CALL36 pcaddu18i+jirl to b or bl.
856+
static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
857+
Relocation &r, uint32_t &remove) {
858+
const Symbol &sym = *r.sym;
859+
const uint64_t insnPair = read64le(sec.content().data() + r.offset);
860+
if (!isInsnPairCall36(insnPair))
861+
return;
862+
863+
const bool isTail = extractBits(insnPair, 32 + 4, 32 + 0) == R_ZERO;
864+
const uint64_t dest =
865+
(r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
866+
const int64_t displace = dest - loc;
867+
868+
if (isInt<28>(displace) && !(displace & 0x3)) {
869+
sec.relaxAux->relocTypes[i] = R_LARCH_B26;
870+
sec.relaxAux->writes.push_back(isTail ? B : BL);
871+
remove = 4;
872+
}
873+
}
874+
833875
static bool relax(Ctx &ctx, InputSection &sec) {
834876
const uint64_t secAddr = sec.getVA();
835877
const MutableArrayRef<Relocation> relocs = sec.relocs();
@@ -874,6 +916,10 @@ static bool relax(Ctx &ctx, InputSection &sec) {
874916
if (isPairRelaxable(relocs, i))
875917
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
876918
break;
919+
case R_LARCH_CALL36:
920+
if (relaxable(relocs, i))
921+
relaxCall(ctx, sec, i, loc, r, remove);
922+
break;
877923
}
878924

879925
// For all anchors whose offsets are <= r.offset, they are preceded by
@@ -971,6 +1017,7 @@ void LoongArch::finalizeRelax(int passes) const {
9711017
switch (newType) {
9721018
case R_LARCH_RELAX:
9731019
break;
1020+
case R_LARCH_B26:
9741021
case R_LARCH_PCREL20_S2:
9751022
skip = 4;
9761023
write32le(p, aux.writes[writesIdx++]);

lld/test/ELF/loongarch-relax-call36.s

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# REQUIRES: loongarch
2+
## Relax R_LARCH_CALL36.
3+
## Currently only loongarch64 is covered, because the call36 pseudo-instruction
4+
## is valid for LA64 only, due to LA32 not having pcaddu18i.
5+
6+
# TODO:
7+
#
8+
# * trivial cases
9+
# * +/- limit: -4, 0, +4
10+
# * align: 0, 1, 2, 3
11+
# * invalid pcaddu18i + jirl pairs
12+
# - rd1 != rj2
13+
# - rd2 not in (0, 1)
14+
15+
# RUN: rm -rf %t && split-file %s %t && cd %t
16+
# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+relax a.s -o a.o
17+
18+
# RUN: ld.lld -T lds a.o -o a
19+
# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s
20+
21+
## Unsure whether this needs a diagnostic. GNU ld allows this.
22+
# RUN: ld.lld -T lds -pie a.o -o a.pie
23+
# RUN: llvm-objdump -d --no-show-raw-insn a.pie | FileCheck %s
24+
25+
# RUN: ld.lld -T lds -pie -z notext -z ifunc-noplt a.o -o a.ifunc-noplt
26+
# RUN: llvm-objdump -d --no-show-raw-insn a.ifunc-noplt | FileCheck %s --check-prefix=CHECK2
27+
28+
# CHECK-LABEL: <_start>:
29+
# CHECK-NEXT: bl -4 <near_before>
30+
# CHECK-NEXT: b -8 <near_before>
31+
# CHECK-NEXT: bl 64 <near_after>
32+
# CHECK-NEXT: b 60 <near_after>
33+
# CHECK-NEXT: pcaddu18i $ra, -512
34+
# CHECK-NEXT: jirl $ra, $ra, -4
35+
# CHECK-NEXT: bl -134217728 <far_b>
36+
# CHECK-NEXT: bl 134217724 <far_y>
37+
# CHECK-NEXT: pcaddu18i $ra, 512
38+
# CHECK-NEXT: jirl $ra, $ra, 0
39+
# CHECK-NEXT: pcaddu18i $t0, 0
40+
# CHECK-NEXT: jirl $t0, $t0, -44
41+
# CHECK-NEXT: pcaddu18i $t0, 0
42+
# CHECK-NEXT: jirl $zero, $t1, 24
43+
# CHECK-NEXT: pcalau12i $t0, 0
44+
# CHECK-NEXT: jirl $zero, $t0, -60
45+
# CHECK-NEXT: pcaddu18i $t0, 0
46+
# CHECK-NEXT: addu16i.d $t0, $t0, 2
47+
# CHECK-EMPTY:
48+
49+
# CHECK-LABEL: <.mid>:
50+
# CHECK-NEXT: b 2048
51+
# CHECK-NEXT: b 2044
52+
# CHECK-EMPTY:
53+
54+
# CHECK2-LABEL: <.mid>:
55+
# CHECK2-NEXT: pcaddu18i $t0, 0
56+
# CHECK2-NEXT: jr $t0
57+
# CHECK2-NEXT: pcaddu18i $t0, 0
58+
# CHECK2-NEXT: jr $t0
59+
# CHECK2-EMPTY:
60+
61+
#--- a.s
62+
.global _start, ifunc
63+
near_before:
64+
ret
65+
66+
_start:
67+
call36 near_before
68+
tail36 $t0, near_before
69+
70+
call36 near_after
71+
tail36 $t0, near_after
72+
73+
call36 far_a ## just out of relaxable range: 0x08000010 - 0x10000014 = -(1 << 27) - 4
74+
call36 far_b ## just in relaxable range: 0x0800001c - 0x1000001c = -(1 << 27)
75+
76+
call36 far_y ## just in relaxable range: 0x1800001c - 0x10000020 = (1 << 27) - 4
77+
call36 far_z ## just out of relaxable range: 0x18000024 - 0x10000024 = 1 << 27
78+
79+
## broken R_LARCH_CALL36 usages should not be relaxed even if relaxable
80+
## otherwise
81+
## correctness is not guaranteed for malformed input like these
82+
83+
## jirl link register (rd) not $zero or $ra (hence not expressible by B or BL)
84+
## the apparent correctness here is only coincidence and should not be relied
85+
## upon
86+
.reloc ., R_LARCH_CALL36, near_before
87+
.reloc ., R_LARCH_RELAX, 0
88+
pcaddu18i $t0, 0
89+
jirl $t0, $t0, 0
90+
91+
## jirl base != pcaddu18i output
92+
.reloc ., R_LARCH_CALL36, near_after
93+
.reloc ., R_LARCH_RELAX, 0
94+
pcaddu18i $t0, 0
95+
jirl $zero, $t1, 0
96+
97+
## 1st insn not pcaddu18i
98+
.reloc ., R_LARCH_CALL36, near_before
99+
.reloc ., R_LARCH_RELAX, 0
100+
pcalau12i $t0, 0
101+
jirl $zero, $t0, 0
102+
103+
## 2nd insn not jirl
104+
.reloc ., R_LARCH_CALL36, near_after
105+
.reloc ., R_LARCH_RELAX, 0
106+
pcaddu18i $t0, 0
107+
addu16i.d $t0, $t0, 0
108+
109+
near_after:
110+
ret
111+
112+
.section .mid,"ax",@progbits
113+
.balign 16
114+
tail36 $t0, ifunc@plt
115+
tail36 $t0, ifunc@plt
116+
117+
.type ifunc, @gnu_indirect_function
118+
ifunc:
119+
ret
120+
121+
#--- lds
122+
SECTIONS {
123+
.text 0x10000000 : { *(.text) }
124+
.mid 0x10000800 : { *(.mid) }
125+
.iplt 0x10001000 : { *(.iplt) }
126+
}
127+
128+
far_a = 0x08000010;
129+
far_b = 0x0800001c;
130+
far_y = 0x1800001c;
131+
far_z = 0x18000024;

0 commit comments

Comments
 (0)