Skip to content

Commit 112d769

Browse files
committed
[ARM] generate correct code for armv6-m XO big stack operations
The ARM backend codebase is dotted with places where armv6-m will generate constant pools. Now that we can generate execute-only code for armv6-m, we need to make sure we use the movs/lsls/adds/lsls/adds/lsls/adds pattern instead of these. Big stacks is one of the obvious places. In this patch we take care of two sites: 1. take care of big stacks in prologue/epilogue 2. take care of save/tSTRspi nodes, which implicitly fixes emitThumbRegPlusImmInReg which is used in several frame lowering fns Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D154233
1 parent 1538ad9 commit 112d769

File tree

4 files changed

+77
-10
lines changed

4 files changed

+77
-10
lines changed

llvm/lib/Target/ARM/ARMAsmPrinter.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1140,10 +1140,24 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
11401140
case ARM::tLDRpci:
11411141
case ARM::t2MOVi16:
11421142
case ARM::t2MOVTi16:
1143+
case ARM::tMOVi8:
1144+
case ARM::tADDi8:
1145+
case ARM::tLSLri:
11431146
// special cases:
11441147
// 1) for Thumb1 code we sometimes materialize the constant via constpool
11451148
// load.
1146-
// 2) for Thumb2 execute only code we materialize the constant via
1149+
// 2) for Thumb1 execute only code we materialize the constant via the
1150+
// following pattern:
1151+
// movs r3, #:upper8_15:<const>
1152+
// lsls r3, #8
1153+
// adds r3, #:upper0_7:<const>
1154+
// lsls r3, #8
1155+
// adds r3, #:lower8_15:<const>
1156+
// lsls r3, #8
1157+
// adds r3, #:lower0_7:<const>
1158+
// So we need to special-case MOVS, ADDS and LSLS, and keep track of
1159+
// where we are in the sequence with the simplest of state machines.
1160+
// 3) for Thumb2 execute only code we materialize the constant via
11471161
// immediate constants in 2 separate instructions (MOVW/MOVT).
11481162
SrcReg = ~0U;
11491163
DstReg = MI->getOperand(0).getReg();
@@ -1334,6 +1348,23 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
13341348
Offset = MI->getOperand(2).getImm();
13351349
AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16);
13361350
break;
1351+
case ARM::tMOVi8:
1352+
Offset = MI->getOperand(2).getImm();
1353+
AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
1354+
break;
1355+
case ARM::tLSLri:
1356+
assert(MI->getOperand(3).getImm() == 8 &&
1357+
"The shift amount is not equal to 8");
1358+
assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() &&
1359+
"The source register is not equal to the destination register");
1360+
AFI->EHPrologueOffsetInRegs[DstReg] <<= 8;
1361+
break;
1362+
case ARM::tADDi8:
1363+
assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() &&
1364+
"The source register is not equal to the destination register");
1365+
Offset = MI->getOperand(3).getImm();
1366+
AFI->EHPrologueOffsetInRegs[DstReg] += Offset;
1367+
break;
13371368
case ARM::t2PAC:
13381369
case ARM::t2PACBTI:
13391370
AFI->EHPrologueRemappedRegs[ARM::R12] = ARM::RA_AUTH_CODE;

llvm/lib/Target/ARM/Thumb1FrameLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
8181
MachineFunction &MF = *MBB.getParent();
8282
const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
8383
if (ST.genExecuteOnly()) {
84-
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg)
85-
.addImm(NumBytes).setMIFlags(MIFlags);
84+
unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
85+
BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg)
86+
.addImm(NumBytes).setMIFlags(MIFlags);
8687
} else {
8788
MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL,
8889
0, MIFlags);

llvm/lib/Target/ARM/ThumbRegisterInfo.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,10 @@ void ThumbRegisterInfo::emitLoadConstPool(
116116
PredReg, MIFlags);
117117
}
118118

119-
/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
120-
/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
121-
/// in a register using mov / mvn sequences or load the immediate from a
119+
/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize a
120+
/// destreg = basereg + immediate in Thumb code. Materialize the immediate in a
121+
/// register using mov / mvn (armv6-M >) sequences, movs / lsls / adds / lsls /
122+
/// adds / lsls / adds sequences (armv6-M) or load the immediate from a
122123
/// constpool entry.
123124
static void emitThumbRegPlusImmInReg(
124125
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -159,7 +160,8 @@ static void emitThumbRegPlusImmInReg(
159160
.addReg(LdReg, RegState::Kill)
160161
.setMIFlags(MIFlags);
161162
} else if (ST.genExecuteOnly()) {
162-
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg)
163+
unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
164+
BuildMI(MBB, MBBI, dl, TII.get(XOInstr), LdReg)
163165
.addImm(NumBytes).setMIFlags(MIFlags);
164166
} else
165167
MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0,

llvm/test/CodeGen/ARM/large-stack.ll

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,54 @@
11
; RUN: llc -mtriple=arm-eabi %s -o /dev/null
2-
; RUN: llc -mtriple=thumbv6m-eabi -mattr=+execute-only %s -o -
2+
; RUN: llc -mtriple=thumbv6m-eabi -mattr=+execute-only %s -o - -filetype=obj | \
3+
; RUN: llvm-objdump -d --no-leading-addr --no-show-raw-insn - | Filecheck %s
34

45
define void @test1() {
5-
%tmp = alloca [ 64 x i32 ] , align 4
6+
; CHECK-LABEL: <test1>:
7+
;; are we using correct prologue immediate materialization pattern for
8+
;; execute only
9+
; CHECK: sub sp, #0x100
10+
%tmp = alloca [ 64 x i32 ] , align 4
611
ret void
712
}
813

914
define void @test2() {
15+
; CHECK-LABEL: <test2>:
16+
;; are we using correct prologue immediate materialization pattern for
17+
;; execute-only
18+
; CHECK: movs [[REG:r[0-9]+]], #0xff
19+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
20+
; CHECK-NEXT: adds [[REG]], #0xff
21+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
22+
; CHECK-NEXT: adds [[REG]], #0xef
23+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
24+
; CHECK-NEXT: adds [[REG]], #0xb8
1025
%tmp = alloca [ 4168 x i8 ] , align 4
1126
ret void
1227
}
1328

1429
define i32 @test3() {
30+
;; are we using correct prologue immediate materialization pattern for
31+
;; execute-only
32+
; CHECK-LABEL: <test3>:
33+
; CHECK: movs [[REG:r[0-9]+]], #0xcf
34+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
35+
; CHECK-NEXT: adds [[REG]], #0xff
36+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
37+
; CHECK-NEXT: adds [[REG]], #0xff
38+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
39+
; CHECK-NEXT: adds [[REG]], #0xf0
1540
%retval = alloca i32, align 4
1641
%tmp = alloca i32, align 4
17-
%a = alloca [805306369 x i8], align 16
42+
%a = alloca [u0x30000001 x i8], align 16
1843
store i32 0, ptr %tmp
44+
;; are we choosing correct store/tSTRspi pattern for execute-only
45+
; CHECK: movs [[REG:r[0-9]+]], #0x30
46+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
47+
; CHECK-NEXT: adds [[REG]], #0x0
48+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
49+
; CHECK-NEXT: adds [[REG]], #0x0
50+
; CHECK-NEXT: lsls [[REG]], [[REG]], #0x8
51+
; CHECK-NEXT: adds [[REG]], #0x8
1952
%tmp1 = load i32, ptr %tmp
2053
ret i32 %tmp1
2154
}

0 commit comments

Comments
 (0)