Skip to content

Commit 990c433

Browse files
committed
[PowerPC] add tests for popcount with zext; NFC
llvm-svn: 375142
1 parent e51d57d commit 990c433

File tree

1 file changed

+304
-0
lines changed

1 file changed

+304
-0
lines changed
+304
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+popcntd < %s | FileCheck %s --check-prefixes=ANY,FAST
3+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mattr=+slow-popcntd < %s | FileCheck %s --check-prefixes=ANY,SLOW
4+
5+
define i16 @zpop_i8_i16(i8 %x) {
6+
; FAST-LABEL: zpop_i8_i16:
7+
; FAST: # %bb.0:
8+
; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
9+
; FAST-NEXT: popcntw 3, 3
10+
; FAST-NEXT: blr
11+
;
12+
; SLOW-LABEL: zpop_i8_i16:
13+
; SLOW: # %bb.0:
14+
; SLOW-NEXT: clrlwi 5, 3, 24
15+
; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
16+
; SLOW-NEXT: andi. 3, 3, 85
17+
; SLOW-NEXT: lis 4, 13107
18+
; SLOW-NEXT: subf 3, 3, 5
19+
; SLOW-NEXT: ori 4, 4, 13107
20+
; SLOW-NEXT: rotlwi 5, 3, 30
21+
; SLOW-NEXT: and 3, 3, 4
22+
; SLOW-NEXT: andis. 4, 5, 13107
23+
; SLOW-NEXT: andi. 5, 5, 13107
24+
; SLOW-NEXT: or 4, 5, 4
25+
; SLOW-NEXT: add 3, 3, 4
26+
; SLOW-NEXT: lis 5, 3855
27+
; SLOW-NEXT: srwi 4, 3, 4
28+
; SLOW-NEXT: add 3, 3, 4
29+
; SLOW-NEXT: lis 4, 257
30+
; SLOW-NEXT: ori 5, 5, 3855
31+
; SLOW-NEXT: and 3, 3, 5
32+
; SLOW-NEXT: ori 4, 4, 257
33+
; SLOW-NEXT: mullw 3, 3, 4
34+
; SLOW-NEXT: srwi 3, 3, 24
35+
; SLOW-NEXT: blr
36+
%z = zext i8 %x to i16
37+
%pop = tail call i16 @llvm.ctpop.i16(i16 %z)
38+
ret i16 %pop
39+
}
40+
41+
define i16 @popz_i8_i16(i8 %x) {
42+
; FAST-LABEL: popz_i8_i16:
43+
; FAST: # %bb.0:
44+
; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
45+
; FAST-NEXT: popcntw 3, 3
46+
; FAST-NEXT: clrldi 3, 3, 32
47+
; FAST-NEXT: blr
48+
;
49+
; SLOW-LABEL: popz_i8_i16:
50+
; SLOW: # %bb.0:
51+
; SLOW-NEXT: clrlwi 5, 3, 24
52+
; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
53+
; SLOW-NEXT: andi. 3, 3, 85
54+
; SLOW-NEXT: lis 4, 13107
55+
; SLOW-NEXT: subf 3, 3, 5
56+
; SLOW-NEXT: ori 4, 4, 13107
57+
; SLOW-NEXT: rotlwi 5, 3, 30
58+
; SLOW-NEXT: and 3, 3, 4
59+
; SLOW-NEXT: andis. 4, 5, 13107
60+
; SLOW-NEXT: andi. 5, 5, 13107
61+
; SLOW-NEXT: or 4, 5, 4
62+
; SLOW-NEXT: add 3, 3, 4
63+
; SLOW-NEXT: lis 5, 3855
64+
; SLOW-NEXT: srwi 4, 3, 4
65+
; SLOW-NEXT: add 3, 3, 4
66+
; SLOW-NEXT: lis 4, 257
67+
; SLOW-NEXT: ori 5, 5, 3855
68+
; SLOW-NEXT: and 3, 3, 5
69+
; SLOW-NEXT: ori 4, 4, 257
70+
; SLOW-NEXT: mullw 3, 3, 4
71+
; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
72+
; SLOW-NEXT: blr
73+
%pop = tail call i8 @llvm.ctpop.i8(i8 %x)
74+
%z = zext i8 %pop to i16
75+
ret i16 %z
76+
}
77+
78+
define i32 @zpop_i8_i32(i8 %x) {
79+
; FAST-LABEL: zpop_i8_i32:
80+
; FAST: # %bb.0:
81+
; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
82+
; FAST-NEXT: popcntw 3, 3
83+
; FAST-NEXT: blr
84+
;
85+
; SLOW-LABEL: zpop_i8_i32:
86+
; SLOW: # %bb.0:
87+
; SLOW-NEXT: clrlwi 5, 3, 24
88+
; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
89+
; SLOW-NEXT: andi. 3, 3, 85
90+
; SLOW-NEXT: lis 4, 13107
91+
; SLOW-NEXT: subf 3, 3, 5
92+
; SLOW-NEXT: ori 4, 4, 13107
93+
; SLOW-NEXT: rotlwi 5, 3, 30
94+
; SLOW-NEXT: and 3, 3, 4
95+
; SLOW-NEXT: andis. 4, 5, 13107
96+
; SLOW-NEXT: andi. 5, 5, 13107
97+
; SLOW-NEXT: or 4, 5, 4
98+
; SLOW-NEXT: add 3, 3, 4
99+
; SLOW-NEXT: lis 5, 3855
100+
; SLOW-NEXT: srwi 4, 3, 4
101+
; SLOW-NEXT: add 3, 3, 4
102+
; SLOW-NEXT: lis 4, 257
103+
; SLOW-NEXT: ori 5, 5, 3855
104+
; SLOW-NEXT: and 3, 3, 5
105+
; SLOW-NEXT: ori 4, 4, 257
106+
; SLOW-NEXT: mullw 3, 3, 4
107+
; SLOW-NEXT: srwi 3, 3, 24
108+
; SLOW-NEXT: blr
109+
%z = zext i8 %x to i32
110+
%pop = tail call i32 @llvm.ctpop.i32(i32 %z)
111+
ret i32 %pop
112+
}
113+
114+
define i32 @popz_i8_32(i8 %x) {
115+
; FAST-LABEL: popz_i8_32:
116+
; FAST: # %bb.0:
117+
; FAST-NEXT: rlwinm 3, 3, 0, 24, 31
118+
; FAST-NEXT: popcntw 3, 3
119+
; FAST-NEXT: clrldi 3, 3, 32
120+
; FAST-NEXT: blr
121+
;
122+
; SLOW-LABEL: popz_i8_32:
123+
; SLOW: # %bb.0:
124+
; SLOW-NEXT: clrlwi 5, 3, 24
125+
; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
126+
; SLOW-NEXT: andi. 3, 3, 85
127+
; SLOW-NEXT: lis 4, 13107
128+
; SLOW-NEXT: subf 3, 3, 5
129+
; SLOW-NEXT: ori 4, 4, 13107
130+
; SLOW-NEXT: rotlwi 5, 3, 30
131+
; SLOW-NEXT: and 3, 3, 4
132+
; SLOW-NEXT: andis. 4, 5, 13107
133+
; SLOW-NEXT: andi. 5, 5, 13107
134+
; SLOW-NEXT: or 4, 5, 4
135+
; SLOW-NEXT: add 3, 3, 4
136+
; SLOW-NEXT: lis 5, 3855
137+
; SLOW-NEXT: srwi 4, 3, 4
138+
; SLOW-NEXT: add 3, 3, 4
139+
; SLOW-NEXT: lis 4, 257
140+
; SLOW-NEXT: ori 5, 5, 3855
141+
; SLOW-NEXT: and 3, 3, 5
142+
; SLOW-NEXT: ori 4, 4, 257
143+
; SLOW-NEXT: mullw 3, 3, 4
144+
; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
145+
; SLOW-NEXT: blr
146+
%pop = tail call i8 @llvm.ctpop.i8(i8 %x)
147+
%z = zext i8 %pop to i32
148+
ret i32 %z
149+
}
150+
151+
define i32 @zpop_i16_i32(i16 %x) {
152+
; FAST-LABEL: zpop_i16_i32:
153+
; FAST: # %bb.0:
154+
; FAST-NEXT: rlwinm 3, 3, 0, 16, 31
155+
; FAST-NEXT: popcntw 3, 3
156+
; FAST-NEXT: blr
157+
;
158+
; SLOW-LABEL: zpop_i16_i32:
159+
; SLOW: # %bb.0:
160+
; SLOW-NEXT: clrlwi 5, 3, 16
161+
; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
162+
; SLOW-NEXT: andi. 3, 3, 21845
163+
; SLOW-NEXT: lis 4, 13107
164+
; SLOW-NEXT: subf 3, 3, 5
165+
; SLOW-NEXT: ori 4, 4, 13107
166+
; SLOW-NEXT: rotlwi 5, 3, 30
167+
; SLOW-NEXT: and 3, 3, 4
168+
; SLOW-NEXT: andis. 4, 5, 13107
169+
; SLOW-NEXT: andi. 5, 5, 13107
170+
; SLOW-NEXT: or 4, 5, 4
171+
; SLOW-NEXT: add 3, 3, 4
172+
; SLOW-NEXT: lis 5, 3855
173+
; SLOW-NEXT: srwi 4, 3, 4
174+
; SLOW-NEXT: add 3, 3, 4
175+
; SLOW-NEXT: lis 4, 257
176+
; SLOW-NEXT: ori 5, 5, 3855
177+
; SLOW-NEXT: and 3, 3, 5
178+
; SLOW-NEXT: ori 4, 4, 257
179+
; SLOW-NEXT: mullw 3, 3, 4
180+
; SLOW-NEXT: srwi 3, 3, 24
181+
; SLOW-NEXT: blr
182+
%z = zext i16 %x to i32
183+
%pop = tail call i32 @llvm.ctpop.i32(i32 %z)
184+
ret i32 %pop
185+
}
186+
187+
define i32 @popz_i16_32(i16 %x) {
188+
; FAST-LABEL: popz_i16_32:
189+
; FAST: # %bb.0:
190+
; FAST-NEXT: rlwinm 3, 3, 0, 16, 31
191+
; FAST-NEXT: popcntw 3, 3
192+
; FAST-NEXT: clrldi 3, 3, 32
193+
; FAST-NEXT: blr
194+
;
195+
; SLOW-LABEL: popz_i16_32:
196+
; SLOW: # %bb.0:
197+
; SLOW-NEXT: clrlwi 5, 3, 16
198+
; SLOW-NEXT: rlwinm 3, 3, 31, 0, 31
199+
; SLOW-NEXT: andi. 3, 3, 21845
200+
; SLOW-NEXT: lis 4, 13107
201+
; SLOW-NEXT: subf 3, 3, 5
202+
; SLOW-NEXT: ori 4, 4, 13107
203+
; SLOW-NEXT: rotlwi 5, 3, 30
204+
; SLOW-NEXT: and 3, 3, 4
205+
; SLOW-NEXT: andis. 4, 5, 13107
206+
; SLOW-NEXT: andi. 5, 5, 13107
207+
; SLOW-NEXT: or 4, 5, 4
208+
; SLOW-NEXT: add 3, 3, 4
209+
; SLOW-NEXT: lis 5, 3855
210+
; SLOW-NEXT: srwi 4, 3, 4
211+
; SLOW-NEXT: add 3, 3, 4
212+
; SLOW-NEXT: lis 4, 257
213+
; SLOW-NEXT: ori 5, 5, 3855
214+
; SLOW-NEXT: and 3, 3, 5
215+
; SLOW-NEXT: ori 4, 4, 257
216+
; SLOW-NEXT: mullw 3, 3, 4
217+
; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
218+
; SLOW-NEXT: blr
219+
%pop = tail call i16 @llvm.ctpop.i16(i16 %x)
220+
%z = zext i16 %pop to i32
221+
ret i32 %z
222+
}
223+
224+
define i64 @zpop_i32_i64(i32 %x) {
225+
; FAST-LABEL: zpop_i32_i64:
226+
; FAST: # %bb.0:
227+
; FAST-NEXT: clrldi 3, 3, 32
228+
; FAST-NEXT: popcntd 3, 3
229+
; FAST-NEXT: blr
230+
;
231+
; SLOW-LABEL: zpop_i32_i64:
232+
; SLOW: # %bb.0:
233+
; SLOW-NEXT: rlwinm 5, 3, 31, 1, 0
234+
; SLOW-NEXT: lis 4, 13107
235+
; SLOW-NEXT: andis. 6, 5, 21845
236+
; SLOW-NEXT: andi. 5, 5, 21845
237+
; SLOW-NEXT: ori 4, 4, 13107
238+
; SLOW-NEXT: or 5, 5, 6
239+
; SLOW-NEXT: clrldi 3, 3, 32
240+
; SLOW-NEXT: rldimi 4, 4, 32, 0
241+
; SLOW-NEXT: sub 3, 3, 5
242+
; SLOW-NEXT: and 5, 3, 4
243+
; SLOW-NEXT: rotldi 3, 3, 62
244+
; SLOW-NEXT: and 3, 3, 4
245+
; SLOW-NEXT: add 3, 5, 3
246+
; SLOW-NEXT: lis 4, 3855
247+
; SLOW-NEXT: rldicl 5, 3, 60, 4
248+
; SLOW-NEXT: ori 4, 4, 3855
249+
; SLOW-NEXT: add 3, 3, 5
250+
; SLOW-NEXT: lis 5, 257
251+
; SLOW-NEXT: rldimi 4, 4, 32, 0
252+
; SLOW-NEXT: ori 5, 5, 257
253+
; SLOW-NEXT: and 3, 3, 4
254+
; SLOW-NEXT: rldimi 5, 5, 32, 0
255+
; SLOW-NEXT: mulld 3, 3, 5
256+
; SLOW-NEXT: rldicl 3, 3, 8, 56
257+
; SLOW-NEXT: blr
258+
%z = zext i32 %x to i64
259+
%pop = tail call i64 @llvm.ctpop.i64(i64 %z)
260+
ret i64 %pop
261+
}
262+
263+
define i64 @popz_i32_i64(i32 %x) {
264+
; FAST-LABEL: popz_i32_i64:
265+
; FAST: # %bb.0:
266+
; FAST-NEXT: popcntw 3, 3
267+
; FAST-NEXT: clrldi 3, 3, 32
268+
; FAST-NEXT: blr
269+
;
270+
; SLOW-LABEL: popz_i32_i64:
271+
; SLOW: # %bb.0:
272+
; SLOW-NEXT: rotlwi 5, 3, 31
273+
; SLOW-NEXT: andis. 6, 5, 21845
274+
; SLOW-NEXT: andi. 5, 5, 21845
275+
; SLOW-NEXT: or 5, 5, 6
276+
; SLOW-NEXT: lis 4, 13107
277+
; SLOW-NEXT: subf 3, 5, 3
278+
; SLOW-NEXT: ori 4, 4, 13107
279+
; SLOW-NEXT: rotlwi 5, 3, 30
280+
; SLOW-NEXT: and 3, 3, 4
281+
; SLOW-NEXT: andis. 4, 5, 13107
282+
; SLOW-NEXT: andi. 5, 5, 13107
283+
; SLOW-NEXT: or 4, 5, 4
284+
; SLOW-NEXT: add 3, 3, 4
285+
; SLOW-NEXT: lis 5, 3855
286+
; SLOW-NEXT: srwi 4, 3, 4
287+
; SLOW-NEXT: add 3, 3, 4
288+
; SLOW-NEXT: lis 4, 257
289+
; SLOW-NEXT: ori 5, 5, 3855
290+
; SLOW-NEXT: and 3, 3, 5
291+
; SLOW-NEXT: ori 4, 4, 257
292+
; SLOW-NEXT: mullw 3, 3, 4
293+
; SLOW-NEXT: rlwinm 3, 3, 8, 24, 31
294+
; SLOW-NEXT: blr
295+
%pop = tail call i32 @llvm.ctpop.i32(i32 %x)
296+
%z = zext i32 %pop to i64
297+
ret i64 %z
298+
}
299+
300+
declare i8 @llvm.ctpop.i8(i8) nounwind readnone
301+
declare i16 @llvm.ctpop.i16(i16) nounwind readnone
302+
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
303+
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
304+

0 commit comments

Comments
 (0)