Skip to content

Commit e628563

Browse files
committed
[ARM][Thumb2] Mark BTI-clearing instructions as scheduling region boundaries
Following #68313 this patch extends the idea to M-profile PACBTI. The Machine Scheduler can reorder instructions within a scheduling region depending on the scheduling policy set. If a BTI-clearing instruction happens to partake in one such region, it might be moved around, therefore ending up where it shouldn't. The solution is to mark all BTI-clearing instructions as scheduling region boundaries. This essentially means that they must not be part of any scheduling region, and as consequence never get moved: - PAC - PACBTI - BTI - SG - CALL_BTI (pseudo-instruction for setjmp + bti) Note that PAC isn't BTI-clearing, but it's replaced by PACBTI late in the compilation pipeline. As far as I know, currently it isn't possible to organically obtain code that's susceptible to the bug: - Instructions that write to SP are region boundaries. PAC seems to always be followed by the pushing of r12 to the stack, so essentially PAC is always by itself in a scheduling region. - CALL_BTI is expanded into a machine instruction bundle. Bundles are unpacked only after the last machine scheduler run. Thus setjmp and BTI can be separated only if someone deliberately run the scheduler once more. - The BTI insertion pass is run late in the pipeline, only after the last machine scheduling has run. So once again it can be reordered only if someone deliberately runs the scheduler again. Nevertheless, one can reasonably argue that we should prevent the bug in spite of the compiler not being able to produce the required conditions for it. If things change, the compiler will be robust against this issue. The tests written for this are contrived: bogus MIR instructions have been added adjacent to the BTI-clearing instructions in order to have them inside non-trivial scheduling regions.
1 parent 42b28c6 commit e628563

File tree

4 files changed

+206
-1
lines changed

4 files changed

+206
-1
lines changed

llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2088,7 +2088,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
20882088
if (!MI.isCall() && MI.definesRegister(ARM::SP))
20892089
return true;
20902090

2091-
return false;
2091+
return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);
20922092
}
20932093

20942094
bool ARMBaseInstrInfo::

llvm/lib/Target/ARM/Thumb2InstrInfo.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,26 @@ MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI,
286286
return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
287287
}
288288

289+
bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
290+
const MachineBasicBlock *MBB,
291+
const MachineFunction &MF) const {
292+
// BTI clearing instructions shall not take part in scheduling regions as
293+
// they must stay in their intended place. Although PAC isn't BTI clearing,
294+
// it can be transformed into PACBTI after the pre-RA Machine Scheduling
295+
// has taken place, so its movement must also be restricted.
296+
switch (MI.getOpcode()) {
297+
case ARM::t2BTI:
298+
case ARM::t2PAC:
299+
case ARM::t2PACBTI:
300+
case ARM::t2CALL_BTI:
301+
case ARM::t2SG:
302+
return true;
303+
default:
304+
break;
305+
}
306+
return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF);
307+
}
308+
289309
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
290310
MachineBasicBlock::iterator &MBBI,
291311
const DebugLoc &dl, Register DestReg,

llvm/lib/Target/ARM/Thumb2InstrInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
6868
unsigned OpIdx1,
6969
unsigned OpIdx2) const override;
7070

71+
bool isSchedulingBoundary(const MachineInstr &MI,
72+
const MachineBasicBlock *MBB,
73+
const MachineFunction &MF) const override;
74+
7175
private:
7276
void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
7377
};
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s
2+
# RUN: llc -o - -run-pass=postmisched %s | FileCheck %s
3+
4+
--- |
5+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
6+
target triple = "thumbv8.1m.main-arm-none-eabi"
7+
8+
define dso_local i32 @foo_bti(i32 noundef %a) local_unnamed_addr #7 {
9+
entry:
10+
%add = add nsw i32 %a, 1
11+
ret i32 %add
12+
}
13+
14+
define dso_local i32 @foo_pacbti(i32 noundef %a) local_unnamed_addr #7 {
15+
entry:
16+
%add = add nsw i32 %a, 1
17+
ret i32 %add
18+
}
19+
20+
define dso_local noundef i32 @foo_setjmp() local_unnamed_addr #0 {
21+
entry:
22+
%buf = alloca [20 x i64], align 8
23+
call void @llvm.lifetime.start.p0(i64 160, ptr nonnull %buf) #4
24+
%call = call i32 @setjmp(ptr noundef nonnull %buf) #5
25+
%tobool.not = icmp eq i32 %call, 0
26+
br i1 %tobool.not, label %if.else, label %if.then
27+
28+
if.then: ; preds = %entry
29+
call void @longjmp(ptr noundef nonnull %buf, i32 noundef 1) #6
30+
unreachable
31+
32+
if.else: ; preds = %entry
33+
call void @llvm.lifetime.end.p0(i64 160, ptr nonnull %buf) #4
34+
ret i32 0
35+
}
36+
37+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
38+
declare dso_local i32 @setjmp(ptr noundef) local_unnamed_addr #2
39+
declare dso_local void @longjmp(ptr noundef, i32 noundef) local_unnamed_addr #3
40+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
41+
42+
attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
43+
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
44+
attributes #2 = { nounwind returns_twice "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
45+
attributes #3 = { noreturn nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
46+
attributes #4 = { nounwind }
47+
attributes #5 = { nounwind returns_twice }
48+
attributes #6 = { noreturn nounwind }
49+
attributes #7 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+strict-align,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-d32,-dotprod,-fp-armv8,-fp-armv8sp,-fp16fml,-hwdiv-arm,-i8mm,-neon,-pacbti,-sb,-sha2,-vfp3,-vfp3sp,-vfp4,-vfp4sp" }
50+
51+
...
52+
---
53+
name: foo_bti
54+
alignment: 4
55+
tracksRegLiveness: true
56+
tracksDebugUserValues: true
57+
liveins:
58+
- { reg: '$r0' }
59+
frameInfo:
60+
maxAlignment: 1
61+
maxCallFrameSize: 0
62+
machineFunctionInfo:
63+
isLRSpilled: false
64+
body: |
65+
bb.0.entry:
66+
liveins: $r0
67+
68+
t2BTI
69+
renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
70+
tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
71+
72+
...
73+
74+
# CHECK-LABEL: name: foo_bti
75+
# CHECK: body:
76+
# CHECK-NEXT: bb.0.entry:
77+
# CHECK-NEXT: liveins: $r0
78+
# CHECK-NEXT: {{^ +$}}
79+
# CHECK-NEXT: t2BTI
80+
81+
---
82+
name: foo_pacbti
83+
alignment: 4
84+
tracksRegLiveness: true
85+
tracksDebugUserValues: true
86+
liveins:
87+
- { reg: '$r0' }
88+
frameInfo:
89+
stackSize: 12
90+
offsetAdjustment: -4
91+
maxAlignment: 4
92+
maxCallFrameSize: 0
93+
stack:
94+
- { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr' }
95+
- { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r7' }
96+
- { id: 2, type: spill-slot, offset: -12, size: 4, alignment: 4, callee-saved-register: '$r12' }
97+
machineFunctionInfo:
98+
isLRSpilled: true
99+
body: |
100+
bb.0.entry:
101+
liveins: $r0, $lr, $r12
102+
103+
frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp
104+
renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg
105+
$sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr
106+
frame-setup CFI_INSTRUCTION def_cfa_offset 8
107+
frame-setup CFI_INSTRUCTION offset $lr, -4
108+
frame-setup CFI_INSTRUCTION offset $r7, -8
109+
$r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg
110+
frame-setup CFI_INSTRUCTION def_cfa_register $r7
111+
early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg
112+
frame-setup CFI_INSTRUCTION offset $ra_auth_code, -12
113+
renamable $r0 = nsw t2ADDri killed renamable $r0, 1, 14 /* CC::al */, $noreg, $noreg
114+
$r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg
115+
$sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
116+
t2AUT implicit $r12, implicit $lr, implicit $sp
117+
tBX_RET 14 /* CC::al */, $noreg, implicit $r0
118+
119+
...
120+
121+
# CHECK-LABEL: name: foo_pacbti
122+
# CHECK: body:
123+
# CHECK-NEXT: bb.0.entry:
124+
# CHECK-NEXT: liveins: $r0, $lr, $r12
125+
# CHECK-NEXT: {{^ +$}}
126+
# CHECK-NEXT: frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp
127+
128+
---
129+
name: foo_setjmp
130+
alignment: 4
131+
exposesReturnsTwice: true
132+
tracksRegLiveness: true
133+
tracksDebugUserValues: true
134+
frameInfo:
135+
stackSize: 168
136+
offsetAdjustment: -160
137+
maxAlignment: 8
138+
adjustsStack: true
139+
hasCalls: true
140+
maxCallFrameSize: 0
141+
localFrameSize: 160
142+
stack:
143+
- { id: 0, name: buf, offset: -168, size: 160, alignment: 8, local-offset: -160 }
144+
- { id: 1, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '$lr',
145+
callee-saved-restored: false }
146+
- { id: 2, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '$r7' }
147+
machineFunctionInfo:
148+
isLRSpilled: true
149+
body: |
150+
bb.0.entry:
151+
successors: %bb.1
152+
liveins: $lr
153+
154+
frame-setup tPUSH 14 /* CC::al */, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp
155+
frame-setup CFI_INSTRUCTION def_cfa_offset 8
156+
frame-setup CFI_INSTRUCTION offset $lr, -4
157+
frame-setup CFI_INSTRUCTION offset $r7, -8
158+
$r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg
159+
frame-setup CFI_INSTRUCTION def_cfa_register $r7
160+
$sp = frame-setup tSUBspi $sp, 40, 14 /* CC::al */, $noreg
161+
renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg
162+
tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0
163+
t2BTI
164+
renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg
165+
tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
166+
t2IT 0, 2, implicit-def $itstate
167+
renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit $itstate
168+
$sp = frame-destroy tADDspi $sp, 40, 0 /* CC::eq */, $cpsr, implicit $itstate
169+
frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $r0, implicit $sp, implicit killed $itstate
170+
171+
bb.1.if.then:
172+
renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg
173+
renamable $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
174+
tBL 14 /* CC::al */, $noreg, @longjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp
175+
176+
...
177+
178+
# CHECK-LABEL: name: foo_setjmp
179+
# CHECK: body:
180+
# CHECK: tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0
181+
# CHECK-NEXT: t2BTI

0 commit comments

Comments
 (0)