Skip to content

Commit 3b16630

Browse files
authored
[MachineOutliner] Sort by Benefit to Cost Ratio (#90264)
This PR depends on #90260 We changed the order in which functions are outlined in Machine Outliner. The formula for priority is found via a black-box Bayesian optimization toolbox. Using this formula for sorting consistently reduces the uncompressed size of large real-world mobile apps. We also ran a few benchmarks using LLVM test suites, and showed that sorting by priority consistently reduces the text segment size. |run (CTMark/)   |baseline (1)|priority (2)|diff (1 -> 2)| |----------------|------------|------------|-------------| |lencod          |349624      |349264      |-0.1030%     | |SPASS           |219672      |219480      |-0.0874%     | |kc              |271956      |251200      |-7.6321%     | |sqlite3         |223920      |223708      |-0.0947%     | |7zip-benchmark  |405364      |402624      |-0.6759%     | |bullet          |139820      |139500      |-0.2289%     | |consumer-typeset|295684      |290196      |-1.8560%     | |pairlocalalign  |72236       |72092       |-0.1993%     | |tramp3d-v4      |189572      |189292      |-0.1477%     | This is part of an enhanced version of machine outliner -- see [RFC](https://discourse.llvm.org/t/rfc-enhanced-machine-outliner-part-1-fulllto-part-2-thinlto-nolto-to-come/78732).
1 parent 2afea72 commit 3b16630

7 files changed

+400
-151
lines changed

llvm/lib/CodeGen/MachineOutliner.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -828,10 +828,12 @@ bool MachineOutliner::outline(Module &M,
828828
<< "\n");
829829
bool OutlinedSomething = false;
830830

831-
// Sort by benefit. The most beneficial functions should be outlined first.
831+
// Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
832+
// The function with highest priority should be outlined first.
832833
stable_sort(FunctionList,
833834
[](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
834-
return LHS.getBenefit() > RHS.getBenefit();
835+
return LHS.getNotOutlinedCost() * RHS.getOutliningCost() >
836+
RHS.getNotOutlinedCost() * LHS.getOutliningCost();
835837
});
836838

837839
// Walk over each function, outlining them as we go along. Functions are
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; This tests the order in which functions are outlined in MachineOutliner
2+
; There are TWO key OutlinedFunction in FunctionList
3+
;
4+
; ===================== First One =====================
5+
; ```
6+
; mov w0, #1
7+
; mov w1, #2
8+
; mov w2, #3
9+
; mov w3, #4
10+
; mov w4, #5
11+
; ```
12+
; It has:
13+
; - `SequenceSize=20` and `OccurrenceCount=6`
14+
; - each Candidate has `CallOverhead=12` and `FrameOverhead=4`
15+
; - `NotOutlinedCost=20*6=120` and `OutliningCost=12*6+20+4=96`
16+
; - `Benefit=120-96=24` and `Priority=120/96=1.25`
17+
;
18+
; ===================== Second One =====================
19+
; ```
20+
; mov w6, #6
21+
; mov w7, #7
22+
; b
23+
; ```
24+
; It has:
25+
; - `SequenceSize=12` and `OccurrenceCount=4`
26+
; - each Candidate has `CallOverhead=4` and `FrameOverhead=0`
27+
; - `NotOutlinedCost=12*4=48` and `OutliningCost=4*4+12+0=28`
28+
; - `Benefit=48-28=20` and `Priority=48/28=1.71`
29+
;
30+
; Note that the first one has higher benefit, but lower priority.
31+
; Hence, when outlining per priority, the second one will be outlined first.
32+
33+
; RUN: llc %s -enable-machine-outliner=always -filetype=obj -o %t
34+
; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-SORT-BY-PRIORITY
35+
36+
; RUN: llc %s -enable-machine-outliner=always -outliner-benefit-threshold=22 -filetype=obj -o %t
37+
; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-THRESHOLD
38+
39+
40+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
41+
target triple = "arm64-apple-macosx14.0.0"
42+
43+
declare i32 @_Z3fooiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef)
44+
45+
define i32 @_Z2f1v() minsize {
46+
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 11, i32 noundef 6, i32 noundef 7)
47+
ret i32 %1
48+
}
49+
50+
define i32 @_Z2f2v() minsize {
51+
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 12, i32 noundef 6, i32 noundef 7)
52+
ret i32 %1
53+
}
54+
55+
define i32 @_Z2f3v() minsize {
56+
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 13, i32 noundef 6, i32 noundef 7)
57+
ret i32 %1
58+
}
59+
60+
define i32 @_Z2f4v() minsize {
61+
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 14, i32 noundef 6, i32 noundef 7)
62+
ret i32 %1
63+
}
64+
65+
define i32 @_Z2f5v() minsize {
66+
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 15, i32 noundef 8, i32 noundef 9)
67+
ret i32 %1
68+
}
69+
70+
define i32 @_Z2f6v() minsize {
71+
%1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 16, i32 noundef 9, i32 noundef 8)
72+
ret i32 %1
73+
}
74+
75+
; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_0>:
76+
; CHECK-SORT-BY-PRIORITY-NEXT: mov w6, #0x6
77+
; CHECK-SORT-BY-PRIORITY-NEXT: mov w7, #0x7
78+
; CHECK-SORT-BY-PRIORITY-NEXT: b
79+
80+
; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_1>:
81+
; CHECK-SORT-BY-PRIORITY-NEXT: mov w0, #0x1
82+
; CHECK-SORT-BY-PRIORITY-NEXT: mov w1, #0x2
83+
; CHECK-SORT-BY-PRIORITY-NEXT: mov w2, #0x3
84+
; CHECK-SORT-BY-PRIORITY-NEXT: mov w3, #0x4
85+
; CHECK-SORT-BY-PRIORITY-NEXT: mov w4, #0x5
86+
; CHECK-SORT-BY-PRIORITY-NEXT: ret
87+
88+
; CHECK-THRESHOLD: <_OUTLINED_FUNCTION_0>:
89+
; CHECK-THRESHOLD-NEXT: mov w0, #0x1
90+
; CHECK-THRESHOLD-NEXT: mov w1, #0x2
91+
; CHECK-THRESHOLD-NEXT: mov w2, #0x3
92+
; CHECK-THRESHOLD-NEXT: mov w3, #0x4
93+
; CHECK-THRESHOLD-NEXT: mov w4, #0x5
94+
; CHECK-THRESHOLD-NEXT: ret
95+
96+
; CHECK-THRESHOLD-NOT: <_OUTLINED_FUNCTION_1>:
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# The content of this test is modfied upon the output obtained from running
2+
# `bin/llc -O2 -stop-before=machine-outliner <path_to_llvm_project>/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll -o -`
3+
# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
4+
5+
--- |
6+
declare i32 @foo()
7+
8+
define void @f1() #0 { ret void }
9+
define void @f2() #0 { ret void }
10+
define void @f3() #0 { ret void }
11+
define void @f4() #0 { ret void }
12+
define void @f5() #0 { ret void }
13+
define void @f6() #0 { ret void }
14+
15+
attributes #0 = { minsize }
16+
...
17+
---
18+
# CHECK-LABEL: name: f1
19+
# CHECK-LABEL: bb.0:
20+
# CHECK-NEXT: liveins: $lr
21+
# CHECK-NEXT: {{ $}}
22+
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
23+
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1
24+
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
25+
# CHECK-NEXT: $w5 = MOVZWi 11, 0
26+
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0
27+
name: f1
28+
tracksRegLiveness: true
29+
frameInfo:
30+
isCalleeSavedInfoValid: true
31+
machineFunctionInfo:
32+
hasRedZone: false
33+
body: |
34+
bb.0:
35+
$w0 = MOVZWi 1, 0
36+
$w1 = MOVZWi 2, 0
37+
$w2 = MOVZWi 3, 0
38+
$w3 = MOVZWi 4, 0
39+
$w4 = MOVZWi 5, 0
40+
$w5 = MOVZWi 11, 0
41+
$w6 = MOVZWi 6, 0
42+
$w7 = MOVZWi 7, 0
43+
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7
44+
45+
...
46+
---
47+
# CHECK-LABEL: name: f2
48+
# CHECK-LABEL: bb.0:
49+
# CHECK-NEXT: liveins: $lr
50+
# CHECK-NEXT: {{ $}}
51+
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
52+
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1
53+
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
54+
# CHECK-NEXT: $w5 = MOVZWi 12, 0
55+
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0
56+
name: f2
57+
tracksRegLiveness: true
58+
frameInfo:
59+
isCalleeSavedInfoValid: true
60+
machineFunctionInfo:
61+
hasRedZone: false
62+
body: |
63+
bb.0:
64+
$w0 = MOVZWi 1, 0
65+
$w1 = MOVZWi 2, 0
66+
$w2 = MOVZWi 3, 0
67+
$w3 = MOVZWi 4, 0
68+
$w4 = MOVZWi 5, 0
69+
$w5 = MOVZWi 12, 0
70+
$w6 = MOVZWi 6, 0
71+
$w7 = MOVZWi 7, 0
72+
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7
73+
74+
...
75+
---
76+
# CHECK-LABEL: name: f3
77+
# CHECK-LABEL: bb.0:
78+
# CHECK-NEXT: liveins: $lr
79+
# CHECK-NEXT: {{ $}}
80+
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
81+
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1
82+
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
83+
# CHECK-NEXT: $w5 = MOVZWi 13, 0
84+
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0
85+
name: f3
86+
tracksRegLiveness: true
87+
frameInfo:
88+
isCalleeSavedInfoValid: true
89+
machineFunctionInfo:
90+
hasRedZone: false
91+
body: |
92+
bb.0:
93+
$w0 = MOVZWi 1, 0
94+
$w1 = MOVZWi 2, 0
95+
$w2 = MOVZWi 3, 0
96+
$w3 = MOVZWi 4, 0
97+
$w4 = MOVZWi 5, 0
98+
$w5 = MOVZWi 13, 0
99+
$w6 = MOVZWi 6, 0
100+
$w7 = MOVZWi 7, 0
101+
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7
102+
103+
...
104+
---
105+
# CHECK-LABEL: name: f4
106+
# CHECK-LABEL: bb.0:
107+
# CHECK-NEXT: liveins: $lr
108+
# CHECK-NEXT: {{ $}}
109+
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
110+
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1
111+
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
112+
# CHECK-NEXT: $w5 = MOVZWi 14, 0
113+
# CHECK-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0
114+
name: f4
115+
tracksRegLiveness: true
116+
frameInfo:
117+
isCalleeSavedInfoValid: true
118+
machineFunctionInfo:
119+
hasRedZone: false
120+
body: |
121+
bb.0:
122+
$w0 = MOVZWi 1, 0
123+
$w1 = MOVZWi 2, 0
124+
$w2 = MOVZWi 3, 0
125+
$w3 = MOVZWi 4, 0
126+
$w4 = MOVZWi 5, 0
127+
$w5 = MOVZWi 14, 0
128+
$w6 = MOVZWi 6, 0
129+
$w7 = MOVZWi 7, 0
130+
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7
131+
132+
...
133+
---
134+
# CHECK-LABEL: name: f5
135+
# CHECK-LABEL: bb.0:
136+
# CHECK-NEXT: liveins: $lr
137+
# CHECK-NEXT: {{ $}}
138+
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
139+
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1
140+
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
141+
# CHECK-NOT: @OUTLINED_FUNCTION_0
142+
name: f5
143+
tracksRegLiveness: true
144+
frameInfo:
145+
isCalleeSavedInfoValid: true
146+
machineFunctionInfo:
147+
hasRedZone: false
148+
body: |
149+
bb.0:
150+
$w0 = MOVZWi 1, 0
151+
$w1 = MOVZWi 2, 0
152+
$w2 = MOVZWi 3, 0
153+
$w3 = MOVZWi 4, 0
154+
$w4 = MOVZWi 5, 0
155+
$w5 = MOVZWi 15, 0
156+
$w6 = MOVZWi 8, 0
157+
$w7 = MOVZWi 9, 0
158+
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7
159+
160+
...
161+
---
162+
# CHECK-LABEL: name: f6
163+
# CHECK-LABEL: bb.0:
164+
# CHECK-NEXT: liveins: $lr
165+
# CHECK-NEXT: {{ $}}
166+
# CHECK-NEXT: $x5 = ORRXrs $xzr, $lr, 0
167+
# CHECK-NEXT: BL @OUTLINED_FUNCTION_1
168+
# CHECK-NEXT: $lr = ORRXrs $xzr, $x5, 0
169+
# CHECK-NOT: @OUTLINED_FUNCTION_0
170+
name: f6
171+
tracksRegLiveness: true
172+
frameInfo:
173+
isCalleeSavedInfoValid: true
174+
machineFunctionInfo:
175+
hasRedZone: false
176+
body: |
177+
bb.0:
178+
$w0 = MOVZWi 1, 0
179+
$w1 = MOVZWi 2, 0
180+
$w2 = MOVZWi 3, 0
181+
$w3 = MOVZWi 4, 0
182+
$w4 = MOVZWi 5, 0
183+
$w5 = MOVZWi 16, 0
184+
$w6 = MOVZWi 9, 0
185+
$w7 = MOVZWi 8, 0
186+
TCRETURNdi @foo, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit killed $w0, implicit killed $w1, implicit killed $w2, implicit killed $w3, implicit killed $w4, implicit killed $w5, implicit killed $w6, implicit killed $w7
187+
188+
...
189+
190+
# CHECK-LABEL: name: OUTLINED_FUNCTION_0
191+
# CHECK-LABEL: bb.0:
192+
# CHECK-NEXT: liveins: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr, $d8, $d9, $d10, $d11, $d12, $d13, $d14, $d15, $w0, $w1, $w2, $w3, $w4, $w5
193+
# CHECK-NEXT: {{ $}}
194+
# CHECK-NEXT: $w6 = MOVZWi 6, 0
195+
# CHECK-NEXT: $w7 = MOVZWi 7, 0
196+
197+
# CHECK-LABEL: name: OUTLINED_FUNCTION_1
198+
# CHECK-LABEL: bb.0:
199+
# CHECK-NEXT: liveins: $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr, $d8, $d9, $d10, $d11, $d12, $d13, $d14, $d15
200+
# CHECK-NEXT: {{ $}}
201+
# CHECK-NEXT: $w0 = MOVZWi 1, 0
202+
# CHECK-NEXT: $w1 = MOVZWi 2, 0
203+
# CHECK-NEXT: $w2 = MOVZWi 3, 0
204+
# CHECK-NEXT: $w3 = MOVZWi 4, 0
205+
# CHECK-NEXT: $w4 = MOVZWi 5, 0
206+
# CHECK-NEXT: RET $lr

0 commit comments

Comments
 (0)