Skip to content

Commit ee42822

Browse files
authored
[BOLT][AArch64]support inline-small-functions for AArch64 (#120187)
Add some functions in `AArch64MCPlusBuilder.cpp` to support inline for AArch64.
1 parent fbb9d49 commit ee42822

File tree

4 files changed

+122
-2
lines changed

4 files changed

+122
-2
lines changed

bolt/lib/Passes/Inliner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,13 +310,13 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
310310
if (MIB.isPseudo(Inst))
311311
continue;
312312

313-
MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
313+
MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64());
314314

315315
// Fix branch target. Strictly speaking, we don't have to do this as
316316
// targets of direct branches will be fixed later and don't matter
317317
// in the CFG state. However, disassembly may look misleading, and
318318
// hence we do the fixing.
319-
if (MIB.isBranch(Inst)) {
319+
if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) {
320320
assert(!MIB.isIndirectBranch(Inst) &&
321321
"unexpected indirect branch in callee");
322322
const BinaryBasicBlock *TargetBB =

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,36 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
133133
public:
134134
using MCPlusBuilder::MCPlusBuilder;
135135

136+
MCPhysReg getStackPointer() const override { return AArch64::SP; }
137+
138+
bool isPush(const MCInst &Inst) const override { return false; }
139+
140+
bool isPop(const MCInst &Inst) const override { return false; }
141+
142+
void createCall(MCInst &Inst, const MCSymbol *Target,
143+
MCContext *Ctx) override {
144+
createDirectCall(Inst, Target, Ctx, false);
145+
}
146+
147+
bool convertTailCallToCall(MCInst &Inst) override {
148+
int NewOpcode;
149+
switch (Inst.getOpcode()) {
150+
default:
151+
return false;
152+
case AArch64::B:
153+
NewOpcode = AArch64::BL;
154+
break;
155+
case AArch64::BR:
156+
NewOpcode = AArch64::BLR;
157+
break;
158+
}
159+
160+
Inst.setOpcode(NewOpcode);
161+
removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
162+
clearOffset(Inst);
163+
return true;
164+
}
165+
136166
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
137167
CompFuncTy Comp) const override {
138168
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
## This test checks that inline is properly handled by BOLT on aarch64.
2+
3+
# REQUIRES: system-linux
4+
5+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
6+
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
7+
# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A \
8+
# RUN: %t.exe -o %t.bolt | FileCheck %s
9+
10+
# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
11+
# CHECK: Binary Function "_Z3barP1A" after inlining {
12+
# CHECK-NOT: bl _Z3fooP1A
13+
# CHECK: ldr x8, [x0]
14+
# CHECK-NEXT: ldr w0, [x8]
15+
16+
.text
17+
.globl _Z3fooP1A
18+
.type _Z3fooP1A,@function
19+
_Z3fooP1A:
20+
ldr x8, [x0]
21+
ldr w0, [x8]
22+
ret
23+
.size _Z3fooP1A, .-_Z3fooP1A
24+
25+
.globl _Z3barP1A
26+
.type _Z3barP1A,@function
27+
_Z3barP1A:
28+
stp x29, x30, [sp, #-16]!
29+
mov x29, sp
30+
bl _Z3fooP1A
31+
mul w0, w0, w0
32+
ldp x29, x30, [sp], #16
33+
ret
34+
.size _Z3barP1A, .-_Z3barP1A
35+
36+
.globl main
37+
.p2align 2
38+
.type main,@function
39+
main:
40+
mov w0, wzr
41+
ret
42+
.size main, .-main
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
## This test checks that inline is properly handled by BOLT on aarch64.
2+
3+
# REQUIRES: system-linux
4+
5+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
6+
# RUN: %clang %cflags -O0 %t.o -o %t.exe -Wl,-q
7+
# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=test \
8+
# RUN: %t.exe -o %t.bolt | FileCheck %s
9+
10+
#CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
11+
#CHECK: Binary Function "test" after inlining {
12+
#CHECK-NOT: bl indirect
13+
#CHECK: add w0, w1, w0
14+
#CHECK-NEXT: blr x2
15+
16+
.text
17+
.globl indirect
18+
.type indirect,@function
19+
indirect:
20+
add w0, w1, w0
21+
br x2
22+
.size indirect, .-indirect
23+
24+
.globl test
25+
.type test,@function
26+
test:
27+
stp x29, x30, [sp, #-32]!
28+
stp x20, x19, [sp, #16]
29+
mov x29, sp
30+
mov w19, w1
31+
mov w20, w0
32+
bl indirect
33+
add w8, w19, w20
34+
cmp w0, #0
35+
csinc w0, w8, wzr, eq
36+
ldp x20, x19, [sp, #16]
37+
ldp x29, x30, [sp], #32
38+
ret
39+
.size test, .-test
40+
41+
.globl main
42+
.type main,@function
43+
main:
44+
mov w0, wzr
45+
ret
46+
.size main, .-main
47+
48+

0 commit comments

Comments
 (0)