Skip to content

Commit 090850f

Browse files
authored
[AArch64][CostModel] Add NFC tests for extractelement cost (#108941)
A successive patch aims to reduce the extractelement cost where the only user(s) is fmul instruction.
1 parent b9bf831 commit 090850f

File tree

1 file changed

+205
-0
lines changed

1 file changed

+205
-0
lines changed
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-unknown-linux \
3+
; RUN: -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,NOFP16
4+
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64-unknown-linux \
5+
; RUN: -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FULLFP16
6+
7+
; res = lane 0 * lane 1
8+
define double @extract_case1(<2 x double> %a) {
9+
; CHECK-LABEL: 'extract_case1'
10+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
11+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x double> %a, i32 1
12+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1
13+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
14+
entry:
15+
%1 = extractelement <2 x double> %a, i32 0
16+
%2 = extractelement <2 x double> %a, i32 1
17+
%res = fmul double %1, %2
18+
ret double %res
19+
}
20+
21+
; res = lane 1 * lane 1
22+
define double @extract_case2(<2 x double> %a) {
23+
; CHECK-LABEL: 'extract_case2'
24+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %0 = extractelement <2 x double> %a, i32 1
25+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %0
26+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
27+
entry:
28+
%1 = extractelement <2 x double> %a, i32 1
29+
%res = fmul double %1, %1
30+
ret double %res
31+
}
32+
33+
; res = lane 0 * lane 0
34+
define double @extract_case3(<2 x double> %a) {
35+
; CHECK-LABEL: 'extract_case3'
36+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
37+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %0
38+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
39+
entry:
40+
%1 = extractelement <2 x double> %a, i32 0
41+
%res = fmul double %1, %1
42+
ret double %res
43+
}
44+
45+
; res = lane 0 * scalar
46+
define double @extract_case4(<2 x double> %a, double %b) {
47+
; CHECK-LABEL: 'extract_case4'
48+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
49+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %b
50+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
51+
entry:
52+
%1 = extractelement <2 x double> %a, i32 0
53+
%res = fmul double %1, %b
54+
ret double %res
55+
}
56+
57+
; res = lane 1 * scalar
58+
define double @extract_case5(<2 x double> %a, double %b) {
59+
; CHECK-LABEL: 'extract_case5'
60+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %0 = extractelement <2 x double> %a, i32 1
61+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %b
62+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
63+
entry:
64+
%1 = extractelement <2 x double> %a, i32 1
65+
%res = fmul double %1, %b
66+
ret double %res
67+
}
68+
69+
; Input vector = <3 x double> (i.e. odd length vector)
70+
; res = lane 0 * lane 1
71+
define double @extract_case6(<3 x double> %a) {
72+
; CHECK-LABEL: 'extract_case6'
73+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <3 x double> %a, i32 0
74+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <3 x double> %a, i32 1
75+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1
76+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
77+
entry:
78+
%1 = extractelement <3 x double> %a, i32 0
79+
%2 = extractelement <3 x double> %a, i32 1
80+
%res = fmul double %1, %2
81+
ret double %res
82+
}
83+
84+
; res = lane 1 * lane 2
85+
; Extract from lane 2 is equivalent to extract from lane 0 of other 128-bit
86+
; register. But for other register sizes, this is not the case.
87+
define double @extract_case7(<4 x double> %a) {
88+
; CHECK-LABEL: 'extract_case7'
89+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %0 = extractelement <4 x double> %a, i32 1
90+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %1 = extractelement <4 x double> %a, i32 2
91+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul double %0, %1
92+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
93+
entry:
94+
%1 = extractelement <4 x double> %a, i32 1
95+
%2 = extractelement <4 x double> %a, i32 2
96+
%res = fmul double %1, %2
97+
ret double %res
98+
}
99+
100+
; res = lane 0 * lane 1
101+
; Additional insert of extract from lane 1.
102+
define double @extract_case8(<2 x double> %a) {
103+
; CHECK-LABEL: 'extract_case8'
104+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
105+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x double> %a, i32 1
106+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %2 = insertelement <2 x double> %a, double %1, i32 0
107+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %3 = call double @llvm.vector.reduce.fmul.v2f64(double 0.000000e+00, <2 x double> %2)
108+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = fmul double %0, %1
109+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = fmul double %3, %4
110+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %5
111+
entry:
112+
%1 = extractelement <2 x double> %a, i32 0
113+
%2 = extractelement <2 x double> %a, i32 1
114+
%3 = insertelement <2 x double> %a, double %2, i32 0
115+
%4 = call double @llvm.vector.reduce.fmul.v2f64(double 0.0, <2 x double> %3)
116+
%5 = fmul double %1, %2
117+
%6 = fmul double %4, %5
118+
ret double %6
119+
}
120+
121+
; res = lane 0 * lane 1
122+
; Additional insert of extract from lane 1.
123+
define double @extract_case9(<2 x double> %a) {
124+
; CHECK-LABEL: 'extract_case9'
125+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
126+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x double> %a, i32 1
127+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %2 = insertelement <2 x double> %a, double %1, i32 0
128+
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %3 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> %2)
129+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %4 = fmul double %0, %1
130+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = fmul double %3, %4
131+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %5
132+
entry:
133+
%1 = extractelement <2 x double> %a, i32 0
134+
%2 = extractelement <2 x double> %a, i32 1
135+
%3 = insertelement <2 x double> %a, double %2, i32 0
136+
%4 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> %3)
137+
%5 = fmul double %1, %2
138+
%6 = fmul double %4, %5
139+
ret double %6
140+
}
141+
142+
; res = lane 0 * lane 1
143+
; Extract from lane 1 passed as function param.
144+
define double @extract_case10(<4 x double> %a) {
145+
; CHECK-LABEL: 'extract_case10'
146+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <4 x double> %a, i32 0
147+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <4 x double> %a, i32 1
148+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @foo(double %1)
149+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = fmul double %0, %1
150+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %2
151+
entry:
152+
%1 = extractelement <4 x double> %a, i32 0
153+
%2 = extractelement <4 x double> %a, i32 1
154+
call void @foo(double %2)
155+
%3 = fmul double %1, %2
156+
ret double %3
157+
}
158+
159+
; res = lane 0 * lane 1
160+
define half @extract_case11(<2 x half> %a) {
161+
; CHECK-LABEL: 'extract_case11'
162+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x half> %a, i32 0
163+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x half> %a, i32 1
164+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul half %0, %1
165+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret half %res
166+
entry:
167+
%1 = extractelement <2 x half> %a, i32 0
168+
%2 = extractelement <2 x half> %a, i32 1
169+
%res = fmul half %1, %2
170+
ret half %res
171+
}
172+
173+
; res = lane 0 * lane 1
174+
define float @extract_case12(<2 x float> %a) {
175+
; CHECK-LABEL: 'extract_case12'
176+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x float> %a, i32 0
177+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x float> %a, i32 1
178+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = fmul float %0, %1
179+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %res
180+
entry:
181+
%1 = extractelement <2 x float> %a, i32 0
182+
%2 = extractelement <2 x float> %a, i32 1
183+
%res = fmul float %1, %2
184+
ret float %res
185+
}
186+
187+
; res = lane 0 + lane 1
188+
; Use of bin-op other than fmul.
189+
define double @extract_case13(<2 x double> %a) {
190+
; CHECK-LABEL: 'extract_case13'
191+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %0 = extractelement <2 x double> %a, i32 0
192+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = extractelement <2 x double> %a, i32 1
193+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = fadd double %0, %1
194+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %res
195+
entry:
196+
%1 = extractelement <2 x double> %a, i32 0
197+
%2 = extractelement <2 x double> %a, i32 1
198+
%res = fadd double %1, %2
199+
ret double %res
200+
}
201+
202+
declare void @foo(double)
203+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
204+
; FULLFP16: {{.*}}
205+
; NOFP16: {{.*}}

0 commit comments

Comments
 (0)