Skip to content

Commit ba14908

Browse files
committed
[ARM] Optimise non-ABI frame pointers
With -fomit-frame-pointer, even if we set up a frame pointer for other reasons (e.g. variable-sized or over-aligned stack allocations), we don't need to create an ABI-compliant frame record. This means that we can save all of the general-purpose registers in one push, instead of splitting it to ensure that the frame pointer and link register are adjacent on the stack, saving two instructions per function.
1 parent 9596ae7 commit ba14908

14 files changed

+523
-333
lines changed

llvm/lib/Target/ARM/ARMFrameLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2964,6 +2964,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
29642964
// on the stack.
29652965
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
29662966
break;
2967+
case ARMSubtarget::NoSplit:
2968+
assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
2969+
"ABI-required frame pointers need a CSR split when signing return "
2970+
"address.");
2971+
CSI.insert(find_if(CSI,
2972+
[=](const auto &CS) {
2973+
Register Reg = CS.getReg();
2974+
return Reg != ARM::LR;
2975+
}),
2976+
CalleeSavedInfo(ARM::R12));
2977+
break;
29672978
default:
29682979
llvm_unreachable("Unexpected CSR split with return address signing");
29692980
}

llvm/lib/Target/ARM/ARMSubtarget.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
492492
const std::vector<CalleeSavedInfo> CSI =
493493
MF.getFrameInfo().getCalleeSavedInfo();
494494

495-
// Returns SplitR7 if the frame setup must be split into two separate pushes
496-
// of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
497-
// always required on Thumb1-only targets, as the push and pop instructions
498-
// can't access the high registers. This is also required when R7 is the frame
499-
// pointer and frame pointer elimiination is disabled, or branch signing is
500-
// enabled and AAPCS is disabled.
501-
if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
502-
!createAAPCSFrameChain()) ||
503-
(getFramePointerReg() == ARM::R7 &&
504-
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
505-
isThumb1Only())
495+
// Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
496+
// cannot use high registers except for lr.
497+
if (isThumb1Only())
498+
return SplitR7;
499+
500+
// If R7 is the frame pointer, we must split at R7 to ensure that the
501+
// previous frame pointer (R7) and return address (LR) are adjacent on the
502+
// stack, to form a valid frame record.
503+
if (getFramePointerReg() == ARM::R7 &&
504+
MF.getTarget().Options.DisableFramePointerElim(MF))
506505
return SplitR7;
507506

508507
// Returns SplitR11WindowsSEH when the stack pointer needs to be
@@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
515514
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
516515
return SplitR11WindowsSEH;
517516

518-
// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
519-
// other in the list of callee saved registers in a frame, and branch
520-
// signing is enabled.
517+
// Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
518+
// and LR to be adjacent on the stack, and branch signing is enabled,
519+
// requiring R12 to be on the stack.
521520
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
522-
getFramePointerReg() == ARM::R11)
521+
getFramePointerReg() == ARM::R11 &&
522+
MF.getTarget().Options.DisableFramePointerElim(MF))
523523
return SplitR11AAPCSSignRA;
524524
return NoSplit;
525525
}

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
9595
/// push {r0-r7, lr}
9696
/// push {r8-r12}
9797
/// vpush {d8-d15}
98+
/// Note that Thumb1 changes this layout when the frame pointer is R11,
99+
/// using a longer sequence of instructions because R11 can't be used by a
100+
/// Thumb1 push instruction. This doesn't currently have a separate enum
101+
/// value, and is handled entriely within Thumb1FrameLowering::emitPrologue.
98102
SplitR7,
99103

100104
/// When the stack frame size if now known (because of variable-sized

llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,24 +11,20 @@ define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-a
1111
; CHECK-NEXT: .cfi_startproc
1212
; CHECK-NEXT: @ %bb.0: @ %entry
1313
; CHECK-NEXT: pacbti r12, lr, sp
14-
; CHECK-NEXT: .save {r7, lr}
15-
; CHECK-NEXT: push {r7, lr}
16-
; CHECK-NEXT: .cfi_def_cfa_offset 8
17-
; CHECK-NEXT: .cfi_offset lr, -4
18-
; CHECK-NEXT: .cfi_offset r7, -8
19-
; CHECK-NEXT: .save {ra_auth_code}
20-
; CHECK-NEXT: str r12, [sp, #-4]!
14+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
15+
; CHECK-NEXT: push.w {r7, r12, lr}
2116
; CHECK-NEXT: .cfi_def_cfa_offset 12
22-
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
17+
; CHECK-NEXT: .cfi_offset lr, -4
18+
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
19+
; CHECK-NEXT: .cfi_offset r7, -12
2320
; CHECK-NEXT: .pad #4
2421
; CHECK-NEXT: sub sp, #4
2522
; CHECK-NEXT: .cfi_def_cfa_offset 16
2623
; CHECK-NEXT: adds r0, #1
2724
; CHECK-NEXT: bl _Z1gi
2825
; CHECK-NEXT: subs r0, #1
2926
; CHECK-NEXT: add sp, #4
30-
; CHECK-NEXT: ldr r12, [sp], #4
31-
; CHECK-NEXT: pop.w {r7, lr}
27+
; CHECK-NEXT: pop.w {r7, r12, lr}
3228
; CHECK-NEXT: aut r12, lr, sp
3329
; CHECK-NEXT: bx lr
3430
entry:
@@ -42,6 +38,8 @@ declare dso_local i32 @_Z1gi(i32)
4238

4339
; UNWIND-LABEL: Opcodes [
4440
; UNWIND-NEXT: 0x00 ; vsp = vsp + 4
41+
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
4542
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
46-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
47-
; UNWIND-NEXT: 0xB0 ; finish
43+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
44+
45+

llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll

Lines changed: 30 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -22,24 +22,20 @@ define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf
2222
; CHECK-NEXT: .cfi_startproc
2323
; CHECK-NEXT: @ %bb.0: @ %entry
2424
; CHECK-NEXT: pac r12, lr, sp
25-
; CHECK-NEXT: .save {r7, lr}
26-
; CHECK-NEXT: push {r7, lr}
27-
; CHECK-NEXT: .cfi_def_cfa_offset 8
28-
; CHECK-NEXT: .cfi_offset lr, -4
29-
; CHECK-NEXT: .cfi_offset r7, -8
30-
; CHECK-NEXT: .save {ra_auth_code}
31-
; CHECK-NEXT: str r12, [sp, #-4]!
25+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
26+
; CHECK-NEXT: push.w {r7, r12, lr}
3227
; CHECK-NEXT: .cfi_def_cfa_offset 12
33-
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
28+
; CHECK-NEXT: .cfi_offset lr, -4
29+
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
30+
; CHECK-NEXT: .cfi_offset r7, -12
3431
; CHECK-NEXT: .pad #4
3532
; CHECK-NEXT: sub sp, #4
3633
; CHECK-NEXT: .cfi_def_cfa_offset 16
3734
; CHECK-NEXT: subs r0, #1
3835
; CHECK-NEXT: bl g
3936
; CHECK-NEXT: adds r0, #1
4037
; CHECK-NEXT: add sp, #4
41-
; CHECK-NEXT: ldr r12, [sp], #4
42-
; CHECK-NEXT: pop.w {r7, lr}
38+
; CHECK-NEXT: pop.w {r7, r12, lr}
4339
; CHECK-NEXT: aut r12, lr, sp
4440
; CHECK-NEXT: bx lr
4541
entry:
@@ -56,20 +52,16 @@ define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
5652
; CHECK-NEXT: pac r12, lr, sp
5753
; CHECK-NEXT: vstr fpcxtns, [sp, #-4]!
5854
; CHECK-NEXT: .cfi_def_cfa_offset 4
59-
; CHECK-NEXT: .save {r7, lr}
60-
; CHECK-NEXT: push {r7, lr}
61-
; CHECK-NEXT: .cfi_def_cfa_offset 12
62-
; CHECK-NEXT: .cfi_offset lr, -8
63-
; CHECK-NEXT: .cfi_offset r7, -12
64-
; CHECK-NEXT: .save {ra_auth_code}
65-
; CHECK-NEXT: str r12, [sp, #-4]!
55+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
56+
; CHECK-NEXT: push.w {r7, r12, lr}
6657
; CHECK-NEXT: .cfi_def_cfa_offset 16
67-
; CHECK-NEXT: .cfi_offset ra_auth_code, -16
58+
; CHECK-NEXT: .cfi_offset lr, -8
59+
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
60+
; CHECK-NEXT: .cfi_offset r7, -16
6861
; CHECK-NEXT: subs r0, #1
6962
; CHECK-NEXT: bl g
7063
; CHECK-NEXT: adds r0, #1
71-
; CHECK-NEXT: ldr r12, [sp], #4
72-
; CHECK-NEXT: pop.w {r7, lr}
64+
; CHECK-NEXT: pop.w {r7, r12, lr}
7365
; CHECK-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
7466
; CHECK-NEXT: vldr fpcxtns, [sp], #4
7567
; CHECK-NEXT: aut r12, lr, sp
@@ -87,24 +79,20 @@ define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
8779
; CHECK: .cfi_startproc
8880
; CHECK-NEXT: @ %bb.0: @ %entry
8981
; CHECK-NEXT: pac r12, lr, sp
90-
; CHECK-NEXT: .save {r7, lr}
91-
; CHECK-NEXT: push {r7, lr}
92-
; CHECK-NEXT: .cfi_def_cfa_offset 8
93-
; CHECK-NEXT: .cfi_offset lr, -4
94-
; CHECK-NEXT: .cfi_offset r7, -8
95-
; CHECK-NEXT: .save {ra_auth_code}
96-
; CHECK-NEXT: str r12, [sp, #-4]!
82+
; CHECK-NEXT: .save {r7, ra_auth_code, lr}
83+
; CHECK-NEXT: push.w {r7, r12, lr}
9784
; CHECK-NEXT: .cfi_def_cfa_offset 12
98-
; CHECK-NEXT: .cfi_offset ra_auth_code, -12
85+
; CHECK-NEXT: .cfi_offset lr, -4
86+
; CHECK-NEXT: .cfi_offset ra_auth_code, -8
87+
; CHECK-NEXT: .cfi_offset r7, -12
9988
; CHECK-NEXT: .pad #4
10089
; CHECK-NEXT: sub sp, #4
10190
; CHECK-NEXT: .cfi_def_cfa_offset 16
10291
; CHECK-NEXT: subs r0, #1
10392
; CHECK-NEXT: bl g
10493
; CHECK-NEXT: adds r0, #1
10594
; CHECK-NEXT: add sp, #4
106-
; CHECK-NEXT: ldr r12, [sp], #4
107-
; CHECK-NEXT: pop.w {r7, lr}
95+
; CHECK-NEXT: pop.w {r7, r12, lr}
10896
; CHECK-NEXT: aut r12, lr, sp
10997
; CHECK-NEXT: mrs r12, control
11098
; CHECK-NEXT: tst.w r12, #8
@@ -149,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe
149137

150138
; UNWIND-LABEL: FunctionAddress: 0x0
151139
; UNWIND: 0x00 ; vsp = vsp + 4
140+
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
152141
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
153-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
154-
; UNWIND-NEXT: 0xB0 ; finish
155-
; UNWIND-NEXT: 0xB0 ; finish
142+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
143+
156144

157-
; UNWIND-LABEL: FunctionAddress: 0x24
158-
; UNWIND: 0xB4 ; pop ra_auth_code
159-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
145+
; UNWIND-LABEL: FunctionAddress: 0x1E
146+
; UNWIND: 0x80 0x08 ; pop {r7}
147+
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
148+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
160149

161-
; UNWIND-LABEL: FunctionAddress: 0x54
150+
; UNWIND-LABEL: FunctionAddress: 0x48
162151
; UNWIND: 0x00 ; vsp = vsp + 4
152+
; UNWIND-NEXT: 0x80 0x08 ; pop {r7}
163153
; UNWIND-NEXT: 0xB4 ; pop ra_auth_code
164-
; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr}
165-
; UNWIND-NEXT: 0xB0 ; finish
166-
; UNWIND-NEXT: 0xB0 ; finish
154+
; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
167155

168156
; UNWIND-LABEL: 00000001 {{.*}} f0
169-
; UNWIND-LABEL: 00000025 {{.*}} f1
170-
; UNWIND-LABEL: 00000055 {{.*}} f2
157+
; UNWIND-LABEL: 0000001f {{.*}} f1
158+
; UNWIND-LABEL: 00000049 {{.*}} f2

0 commit comments

Comments
 (0)