Skip to content

Commit 882814e

Browse files
authored
[InlineCost] Correct the default branch cost for the switch statement (#85160)
Fixes #81723. The earliest commit of the related code is: 919f9e8. I tried to understand the following code with #77856 (comment). https://github.com/llvm/llvm-project/blob/5932fcc47855fdd209784f38820422d2369b84b2/llvm/lib/Analysis/InlineCost.cpp#L709-L720 I think only scenarios where there is a default branch were considered.
1 parent de9b386 commit 882814e

File tree

4 files changed

+152
-49
lines changed

4 files changed

+152
-49
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -701,21 +701,26 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
701701

702702
void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
703703
bool DefaultDestUndefined) override {
704-
if (!DefaultDestUndefined)
705-
addCost(2 * InstrCost);
706704
// If suitable for a jump table, consider the cost for the table size and
707705
// branch to destination.
708706
// Maximum valid cost increased in this function.
709707
if (JumpTableSize) {
708+
// Suppose a default branch includes one compare and one conditional
709+
// branch if it's reachable.
710+
if (!DefaultDestUndefined)
711+
addCost(2 * InstrCost);
712+
// Suppose a jump table requires one load and one jump instruction.
710713
int64_t JTCost =
711-
static_cast<int64_t>(JumpTableSize) * InstrCost + 4 * InstrCost;
714+
static_cast<int64_t>(JumpTableSize) * InstrCost + 2 * InstrCost;
712715
addCost(JTCost);
713716
return;
714717
}
715718

716719
if (NumCaseCluster <= 3) {
717720
// Suppose a comparison includes one compare and one conditional branch.
718-
addCost(NumCaseCluster * 2 * InstrCost);
721+
// We can reduce a set of instructions if the default branch is
722+
// undefined.
723+
addCost((NumCaseCluster - DefaultDestUndefined) * 2 * InstrCost);
719724
return;
720725
}
721726

@@ -1152,7 +1157,7 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
11521157
// FIXME: These constants are taken from the heuristic-based cost visitor.
11531158
// These should be removed entirely in a later revision to avoid reliance on
11541159
// heuristics in the ML inliner.
1155-
static constexpr int JTCostMultiplier = 4;
1160+
static constexpr int JTCostMultiplier = 2;
11561161
static constexpr int CaseClusterCostMultiplier = 2;
11571162
static constexpr int SwitchDefaultDestCostMultiplier = 2;
11581163
static constexpr int SwitchCostMultiplier = 2;
@@ -1235,11 +1240,10 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
12351240

12361241
void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster,
12371242
bool DefaultDestUndefined) override {
1238-
if (!DefaultDestUndefined)
1239-
increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1240-
SwitchDefaultDestCostMultiplier * InstrCost);
1241-
12421243
if (JumpTableSize) {
1244+
if (!DefaultDestUndefined)
1245+
increment(InlineCostFeatureIndex::switch_default_dest_penalty,
1246+
SwitchDefaultDestCostMultiplier * InstrCost);
12431247
int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
12441248
JTCostMultiplier * InstrCost;
12451249
increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
@@ -1248,7 +1252,8 @@ class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
12481252

12491253
if (NumCaseCluster <= 3) {
12501254
increment(InlineCostFeatureIndex::case_cluster_penalty,
1251-
NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
1255+
(NumCaseCluster - DefaultDestUndefined) *
1256+
CaseClusterCostMultiplier * InstrCost);
12521257
return;
12531258
}
12541259

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
; RUN: opt -S -passes=inline %s -debug-only=inline-cost -min-jump-table-entries=4 --disable-output 2>&1 | FileCheck %s -check-prefix=LOOKUPTABLE -match-full-lines
2+
; RUN: opt -S -passes=inline %s -debug-only=inline-cost -min-jump-table-entries=5 --disable-output 2>&1 | FileCheck %s -check-prefix=SWITCH -match-full-lines
3+
; REQUIRES: asserts
4+
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
define i64 @main(i64 %a) {
9+
%b = call i64 @small_switch_default(i64 %a)
10+
%c = call i64 @small_switch_no_default(i64 %a)
11+
%d = call i64 @lookup_table_default(i64 %a)
12+
%e = call i64 @lookup_table_no_default(i64 %a)
13+
ret i64 %b
14+
}
15+
16+
; SWITCH-LABEL: Analyzing call of small_switch_default{{.*}}
17+
; SWITCH: Cost: 0
18+
define i64 @small_switch_default(i64 %a) {
19+
switch i64 %a, label %default_branch [
20+
i64 -1, label %branch_0
21+
i64 8, label %branch_1
22+
i64 52, label %branch_2
23+
]
24+
25+
branch_0:
26+
br label %exit
27+
28+
branch_1:
29+
br label %exit
30+
31+
branch_2:
32+
br label %exit
33+
34+
default_branch:
35+
br label %exit
36+
37+
exit:
38+
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ], [ 3, %default_branch ]
39+
ret i64 %b
40+
}
41+
42+
; SWITCH-LABEL: Analyzing call of small_switch_no_default{{.*}}
43+
; SWITCH: Cost: -10
44+
define i64 @small_switch_no_default(i64 %a) {
45+
switch i64 %a, label %unreachabledefault [
46+
i64 -1, label %branch_0
47+
i64 8, label %branch_1
48+
i64 52, label %branch_2
49+
]
50+
51+
branch_0:
52+
br label %exit
53+
54+
branch_1:
55+
br label %exit
56+
57+
branch_2:
58+
br label %exit
59+
60+
unreachabledefault:
61+
unreachable
62+
63+
exit:
64+
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ]
65+
ret i64 %b
66+
}
67+
68+
; LOOKUPTABLE-LABEL: Analyzing call of lookup_table_default{{.*}}
69+
; LOOKUPTABLE: Cost: 10
70+
; SWITCH-LABEL: Analyzing call of lookup_table_default{{.*}}
71+
; SWITCH: Cost: 20
72+
define i64 @lookup_table_default(i64 %a) {
73+
switch i64 %a, label %default_branch [
74+
i64 0, label %branch_0
75+
i64 1, label %branch_1
76+
i64 2, label %branch_2
77+
i64 3, label %branch_3
78+
]
79+
80+
branch_0:
81+
br label %exit
82+
83+
branch_1:
84+
br label %exit
85+
86+
branch_2:
87+
br label %exit
88+
89+
branch_3:
90+
br label %exit
91+
92+
default_branch:
93+
br label %exit
94+
95+
exit:
96+
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ], [ 7, %branch_3 ], [ 3, %default_branch ]
97+
ret i64 %b
98+
}
99+
100+
; LOOKUPTABLE-LABEL: Analyzing call of lookup_table_no_default{{.*}}
101+
; LOOKUPTABLE: Cost: 0
102+
; SWITCH-LABEL: Analyzing call of lookup_table_no_default{{.*}}
103+
; SWITCH: Cost: 20
104+
define i64 @lookup_table_no_default(i64 %a) {
105+
switch i64 %a, label %unreachabledefault [
106+
i64 0, label %branch_0
107+
i64 1, label %branch_1
108+
i64 2, label %branch_2
109+
i64 3, label %branch_3
110+
]
111+
112+
branch_0:
113+
br label %exit
114+
115+
branch_1:
116+
br label %exit
117+
118+
branch_2:
119+
br label %exit
120+
121+
branch_3:
122+
br label %exit
123+
124+
unreachabledefault:
125+
unreachable
126+
127+
exit:
128+
%b = phi i64 [ 5, %branch_0 ], [ 9, %branch_1 ], [ 2, %branch_2 ], [ 7, %branch_3 ]
129+
ret i64 %b
130+
}

llvm/test/Transforms/Inline/inline-switch-default-2.ll

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt %s -S -passes=inline -inline-threshold=21 | FileCheck %s
2+
; RUN: opt %s -S -passes=inline -inline-threshold=11 | FileCheck %s
33

44
; Check for scenarios without TTI.
55

@@ -16,24 +16,7 @@ define i64 @foo1(i64 %a) {
1616
define i64 @foo2(i64 %a) {
1717
; CHECK-LABEL: define i64 @foo2(
1818
; CHECK-SAME: i64 [[A:%.*]]) {
19-
; CHECK-NEXT: switch i64 [[A]], label [[UNREACHABLEDEFAULT_I:%.*]] [
20-
; CHECK-NEXT: i64 0, label [[BRANCH_0_I:%.*]]
21-
; CHECK-NEXT: i64 2, label [[BRANCH_2_I:%.*]]
22-
; CHECK-NEXT: i64 4, label [[BRANCH_4_I:%.*]]
23-
; CHECK-NEXT: i64 6, label [[BRANCH_6_I:%.*]]
24-
; CHECK-NEXT: ]
25-
; CHECK: branch_0.i:
26-
; CHECK-NEXT: br label [[BAR2_EXIT:%.*]]
27-
; CHECK: branch_2.i:
28-
; CHECK-NEXT: br label [[BAR2_EXIT]]
29-
; CHECK: branch_4.i:
30-
; CHECK-NEXT: br label [[BAR2_EXIT]]
31-
; CHECK: branch_6.i:
32-
; CHECK-NEXT: br label [[BAR2_EXIT]]
33-
; CHECK: unreachabledefault.i:
34-
; CHECK-NEXT: unreachable
35-
; CHECK: bar2.exit:
36-
; CHECK-NEXT: [[B_I:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ]
19+
; CHECK-NEXT: [[B_I:%.*]] = call i64 @bar2(i64 [[A]])
3720
; CHECK-NEXT: ret i64 [[B_I]]
3821
;
3922
%b = call i64 @bar2(i64 %a)

llvm/test/Transforms/Inline/inline-switch-default.ll

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt %s -S -passes=inline -inline-threshold=26 -min-jump-table-entries=4 | FileCheck %s -check-prefix=LOOKUPTABLE
3-
; RUN: opt %s -S -passes=inline -inline-threshold=21 -min-jump-table-entries=5 | FileCheck %s -check-prefix=SWITCH
2+
; RUN: opt %s -S -passes=inline -inline-threshold=16 -min-jump-table-entries=4 | FileCheck %s -check-prefix=LOOKUPTABLE
3+
; RUN: opt %s -S -passes=inline -inline-threshold=11 -min-jump-table-entries=5 | FileCheck %s -check-prefix=SWITCH
44

55
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
66
target triple = "x86_64-unknown-linux-gnu"
@@ -22,6 +22,8 @@ define i64 @foo1(i64 %a) {
2222
ret i64 %b
2323
}
2424

25+
; Since the default branch is undefined behavior,
26+
; we can inline `bar2`: https://github.com/llvm/llvm-project/issues/90929
2527
define i64 @foo2(i64 %a) {
2628
; LOOKUPTABLE-LABEL: define i64 @foo2(
2729
; LOOKUPTABLE-SAME: i64 [[A:%.*]]) {
@@ -47,24 +49,7 @@ define i64 @foo2(i64 %a) {
4749
;
4850
; SWITCH-LABEL: define i64 @foo2(
4951
; SWITCH-SAME: i64 [[A:%.*]]) {
50-
; SWITCH-NEXT: switch i64 [[A]], label [[UNREACHABLEDEFAULT_I:%.*]] [
51-
; SWITCH-NEXT: i64 0, label [[BRANCH_0_I:%.*]]
52-
; SWITCH-NEXT: i64 2, label [[BRANCH_2_I:%.*]]
53-
; SWITCH-NEXT: i64 4, label [[BRANCH_4_I:%.*]]
54-
; SWITCH-NEXT: i64 6, label [[BRANCH_6_I:%.*]]
55-
; SWITCH-NEXT: ]
56-
; SWITCH: branch_0.i:
57-
; SWITCH-NEXT: br label [[BAR2_EXIT:%.*]]
58-
; SWITCH: branch_2.i:
59-
; SWITCH-NEXT: br label [[BAR2_EXIT]]
60-
; SWITCH: branch_4.i:
61-
; SWITCH-NEXT: br label [[BAR2_EXIT]]
62-
; SWITCH: branch_6.i:
63-
; SWITCH-NEXT: br label [[BAR2_EXIT]]
64-
; SWITCH: unreachabledefault.i:
65-
; SWITCH-NEXT: unreachable
66-
; SWITCH: bar2.exit:
67-
; SWITCH-NEXT: [[B_I:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ]
52+
; SWITCH-NEXT: [[B_I:%.*]] = call i64 @bar2(i64 [[A]])
6853
; SWITCH-NEXT: ret i64 [[B_I]]
6954
;
7055
%b = call i64 @bar2(i64 %a)

0 commit comments

Comments
 (0)