-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[BOLT][AArch64]support inline-small-functions
for AArch64
#120187
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
inline-small-functions
for AArch64inline-small-functions
for AArch64
@llvm/pr-subscribers-bolt Author: Nicholas (liusy58) ChangesAdd some functions in Full diff: https://github.com/llvm/llvm-project/pull/120187.diff 3 Files Affected:
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index f004a8eeea185b..1793f4ff1f1480 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -310,13 +310,13 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
if (MIB.isPseudo(Inst))
continue;
- MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86());
+ MIB.stripAnnotations(Inst, /*KeepTC=*/BC.isX86() || BC.isAArch64());
// Fix branch target. Strictly speaking, we don't have to do this as
// targets of direct branches will be fixed later and don't matter
// in the CFG state. However, disassembly may look misleading, and
// hence we do the fixing.
- if (MIB.isBranch(Inst)) {
+ if (MIB.isBranch(Inst) && !MIB.isTailCall(Inst)) {
assert(!MIB.isIndirectBranch(Inst) &&
"unexpected indirect branch in callee");
const BinaryBasicBlock *TargetBB =
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 7e08e5c81d26ff..0722b8ae0cb2c9 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -133,6 +133,34 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
public:
using MCPlusBuilder::MCPlusBuilder;
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
+ bool isPush(const MCInst &Inst) const override { return false; }
+
+ bool isPop(const MCInst &Inst) const override { return false; }
+
+ void createCall(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ createDirectCall(Inst, Target, Ctx, false);
+ }
+
+ bool convertTailCallToCall(MCInst &Inst) override {
+ int NewOpcode;
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case AArch64::B:
+ NewOpcode = AArch64::BL;
+ break;
+ case AArch64::BR:
+ NewOpcode = AArch64::BLR;
+ break;
+ }
+ Inst.setOpcode(NewOpcode);
+ removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
+ return true;
+ }
+
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
diff --git a/bolt/test/AArch64/inline-test.s b/bolt/test/AArch64/inline-test.s
new file mode 100644
index 00000000000000..ec33f735163899
--- /dev/null
+++ b/bolt/test/AArch64/inline-test.s
@@ -0,0 +1,57 @@
+# This test checks that inline is properly handled by BOLT on aarch64.
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-linux-gnu %s -o %t.o
+# RUN: %clang --target=aarch64-unknown-linux %t.o -o %t.exe -Wl,-q
+# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A -debug-only=bolt-inliner %t.exe -o %t.bolt | FileCheck %s
+
+# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes.
+# CHECK: Binary Function "_Z3barP1A" after inlining {
+# CHECK-NOT: bl _Z3fooP1A
+# CHECK: ldr x8, [x0]
+# CHECK-NEXT: ldr w0, [x8]
+
+ .text
+ .globl _Z3fooP1A // -- Begin function _Z3fooP1A
+ .p2align 2
+ .type _Z3fooP1A,@function
+_Z3fooP1A: // @_Z3fooP1A
+ .cfi_startproc
+ ldr x8, [x0]
+ ldr w0, [x8]
+ ret
+.Lfunc_end0:
+ .size _Z3fooP1A, .Lfunc_end0-_Z3fooP1A
+ .cfi_endproc
+ .globl _Z3barP1A // -- Begin function _Z3barP1A
+ .p2align 2
+ .type _Z3barP1A,@function
+_Z3barP1A: // @_Z3barP1A
+ .cfi_startproc
+ stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+ .cfi_def_cfa_offset 16
+ mov x29, sp
+ .cfi_def_cfa w29, 16
+ .cfi_offset w30, -8
+ .cfi_offset w29, -16
+ bl _Z3fooP1A
+ mul w0, w0, w0
+ .cfi_def_cfa wsp, 16
+ ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+ .cfi_def_cfa_offset 0
+ .cfi_restore w30
+ .cfi_restore w29
+ ret
+.Lfunc_end1:
+ .size _Z3barP1A, .Lfunc_end1-_Z3barP1A
+ .cfi_endproc
+ .globl main // -- Begin function main
+ .p2align 2
+ .type main,@function
+main: // @main
+ .cfi_startproc
+ mov w0, wzr
+ ret
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
+ .cfi_endproc
+ .section ".note.GNU-stack","",@progbits
+ .addrsig
\ No newline at end of file
|
efce59a
to
d8acd28
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good overall, but please address comments/suggestions
# RUN: llvm-bolt --inline-small-functions --print-inline --print-only=_Z3barP1A \ | ||
# RUN: %t.exe -o %t.bolt | FileCheck %s | ||
|
||
# CHECK: BOLT-INFO: inlined 0 calls at 1 call sites in 2 iteration(s). Change in binary size: 4 bytes. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why does it report inlined 0 calls
but inlining is actually in effect?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"inlined 0 calls" here due to lack of fdata file
.Lfunc_end0: | ||
.size _Z3fooP1A, .Lfunc_end0-_Z3fooP1A |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.Lfunc_end0: | |
.size _Z3fooP1A, .Lfunc_end0-_Z3fooP1A | |
.size _Z3fooP1A, .-_Z3fooP1A |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fix it now.
.section ".note.GNU-stack","",@progbits | ||
.addrsig |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.section ".note.GNU-stack","",@progbits | |
.addrsig |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fix it now
@@ -0,0 +1,48 @@ | |||
## This test checks that inline is properly handled by BOLT on aarch64. | |||
|
|||
# REQUIRES: system-linux, asserts |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it really needs asserts?
d8acd28
to
766cd23
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
3b6eabf
to
3bb6893
Compare
3bb6893
to
aa2bb22
Compare
This functionality is needed for inliner pass and also for correct dyno stats. Needed for [PR](#120187)
This functionality is needed for inliner pass and also for correct dyno stats. Needed for [PR](llvm/llvm-project#120187)
Add some functions in
AArch64MCPlusBuilder.cpp
to support inline for AArch64.