Skip to content

Commit faa385a

Browse files
lukel97preames
authored andcommitted
[RISCV] Add tests for length changing shuffles
Tests taken from Luke's 88147 with minimal changes by me (preames). The main case of interest here is when mask length is less than source length (i.e. length is decreasing). We often scalarize these, which on RISCV can be quite painful.
1 parent b57b3f6 commit faa385a

File tree

1 file changed

+310
-0
lines changed

1 file changed

+310
-0
lines changed
Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV32 %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck -check-prefixes=CHECK,RV64 %s
4+
5+
6+
define <8 x i1> @v8i1_v16i1(<16 x i1>) {
7+
; RV32-LABEL: v8i1_v16i1:
8+
; RV32: # %bb.0:
9+
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
10+
; RV32-NEXT: vmv.x.s a0, v0
11+
; RV32-NEXT: slli a1, a0, 19
12+
; RV32-NEXT: srli a1, a1, 31
13+
; RV32-NEXT: slli a2, a0, 26
14+
; RV32-NEXT: srli a2, a2, 31
15+
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
16+
; RV32-NEXT: vmv.v.x v8, a2
17+
; RV32-NEXT: vslide1down.vx v8, v8, a1
18+
; RV32-NEXT: slli a1, a0, 24
19+
; RV32-NEXT: srli a1, a1, 31
20+
; RV32-NEXT: vslide1down.vx v8, v8, a1
21+
; RV32-NEXT: slli a1, a0, 29
22+
; RV32-NEXT: srli a1, a1, 31
23+
; RV32-NEXT: vslide1down.vx v8, v8, a1
24+
; RV32-NEXT: slli a1, a0, 18
25+
; RV32-NEXT: srli a1, a1, 31
26+
; RV32-NEXT: slli a2, a0, 16
27+
; RV32-NEXT: srli a2, a2, 31
28+
; RV32-NEXT: vmv.v.x v9, a2
29+
; RV32-NEXT: vslide1down.vx v9, v9, a1
30+
; RV32-NEXT: slli a1, a0, 27
31+
; RV32-NEXT: srli a1, a1, 31
32+
; RV32-NEXT: vslide1down.vx v9, v9, a1
33+
; RV32-NEXT: slli a0, a0, 28
34+
; RV32-NEXT: srli a0, a0, 31
35+
; RV32-NEXT: vmv.v.i v0, 15
36+
; RV32-NEXT: vslide1down.vx v9, v9, a0
37+
; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t
38+
; RV32-NEXT: vand.vi v8, v9, 1
39+
; RV32-NEXT: vmsne.vi v0, v8, 0
40+
; RV32-NEXT: ret
41+
;
42+
; RV64-LABEL: v8i1_v16i1:
43+
; RV64: # %bb.0:
44+
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
45+
; RV64-NEXT: vmv.x.s a0, v0
46+
; RV64-NEXT: slli a1, a0, 51
47+
; RV64-NEXT: srli a1, a1, 63
48+
; RV64-NEXT: slli a2, a0, 58
49+
; RV64-NEXT: srli a2, a2, 63
50+
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
51+
; RV64-NEXT: vmv.v.x v8, a2
52+
; RV64-NEXT: vslide1down.vx v8, v8, a1
53+
; RV64-NEXT: slli a1, a0, 56
54+
; RV64-NEXT: srli a1, a1, 63
55+
; RV64-NEXT: vslide1down.vx v8, v8, a1
56+
; RV64-NEXT: slli a1, a0, 61
57+
; RV64-NEXT: srli a1, a1, 63
58+
; RV64-NEXT: vslide1down.vx v8, v8, a1
59+
; RV64-NEXT: slli a1, a0, 50
60+
; RV64-NEXT: srli a1, a1, 63
61+
; RV64-NEXT: slli a2, a0, 48
62+
; RV64-NEXT: srli a2, a2, 63
63+
; RV64-NEXT: vmv.v.x v9, a2
64+
; RV64-NEXT: vslide1down.vx v9, v9, a1
65+
; RV64-NEXT: slli a1, a0, 59
66+
; RV64-NEXT: srli a1, a1, 63
67+
; RV64-NEXT: vslide1down.vx v9, v9, a1
68+
; RV64-NEXT: slli a0, a0, 60
69+
; RV64-NEXT: srli a0, a0, 63
70+
; RV64-NEXT: vmv.v.i v0, 15
71+
; RV64-NEXT: vslide1down.vx v9, v9, a0
72+
; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t
73+
; RV64-NEXT: vand.vi v8, v9, 1
74+
; RV64-NEXT: vmsne.vi v0, v8, 0
75+
; RV64-NEXT: ret
76+
%2 = shufflevector <16 x i1> %0, <16 x i1> poison, <8 x i32> <i32 5, i32 12, i32 7, i32 2, i32 15, i32 13, i32 4, i32 3>
77+
ret <8 x i1> %2
78+
}
79+
80+
define <4 x i32> @v4i32_v8i32(<8 x i32>) {
81+
; CHECK-LABEL: v4i32_v8i32:
82+
; CHECK: # %bb.0:
83+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
84+
; CHECK-NEXT: vid.v v10
85+
; CHECK-NEXT: vsrl.vi v10, v10, 1
86+
; CHECK-NEXT: vrsub.vi v11, v10, 3
87+
; CHECK-NEXT: vrgather.vv v10, v8, v11
88+
; CHECK-NEXT: vmv.v.i v0, 5
89+
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
90+
; CHECK-NEXT: vslidedown.vi v8, v8, 4
91+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
92+
; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t
93+
; CHECK-NEXT: vmv.v.v v8, v10
94+
; CHECK-NEXT: ret
95+
%2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> <i32 5, i32 3, i32 7, i32 2>
96+
ret <4 x i32> %2
97+
}
98+
99+
define <4 x i32> @v4i32_v16i32(<16 x i32>) {
100+
; RV32-LABEL: v4i32_v16i32:
101+
; RV32: # %bb.0:
102+
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
103+
; RV32-NEXT: vmv.v.i v12, 1
104+
; RV32-NEXT: vmv.v.i v14, 6
105+
; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
106+
; RV32-NEXT: vslideup.vi v14, v12, 1
107+
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
108+
; RV32-NEXT: vid.v v12
109+
; RV32-NEXT: vadd.vv v12, v12, v12
110+
; RV32-NEXT: vadd.vi v15, v12, 1
111+
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
112+
; RV32-NEXT: vrgatherei16.vv v12, v8, v15
113+
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
114+
; RV32-NEXT: vmv.v.i v0, 10
115+
; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma
116+
; RV32-NEXT: vslidedown.vi v8, v8, 8
117+
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
118+
; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t
119+
; RV32-NEXT: vmv1r.v v8, v12
120+
; RV32-NEXT: ret
121+
;
122+
; RV64-LABEL: v4i32_v16i32:
123+
; RV64: # %bb.0:
124+
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
125+
; RV64-NEXT: vid.v v12
126+
; RV64-NEXT: vadd.vv v12, v12, v12
127+
; RV64-NEXT: vadd.vi v14, v12, 1
128+
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
129+
; RV64-NEXT: vrgatherei16.vv v12, v8, v14
130+
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
131+
; RV64-NEXT: vmv.v.i v0, 10
132+
; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma
133+
; RV64-NEXT: vslidedown.vi v8, v8, 8
134+
; RV64-NEXT: li a0, 3
135+
; RV64-NEXT: slli a0, a0, 33
136+
; RV64-NEXT: addi a0, a0, 1
137+
; RV64-NEXT: slli a0, a0, 16
138+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
139+
; RV64-NEXT: vmv.v.x v10, a0
140+
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu
141+
; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t
142+
; RV64-NEXT: vmv1r.v v8, v12
143+
; RV64-NEXT: ret
144+
%2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 1, i32 9, i32 5, i32 14>
145+
ret <4 x i32> %2
146+
}
147+
148+
define <4 x i32> @v4i32_v32i32(<32 x i32>) {
149+
; RV32-LABEL: v4i32_v32i32:
150+
; RV32: # %bb.0:
151+
; RV32-NEXT: addi sp, sp, -256
152+
; RV32-NEXT: .cfi_def_cfa_offset 256
153+
; RV32-NEXT: sw ra, 252(sp) # 4-byte Folded Spill
154+
; RV32-NEXT: sw s0, 248(sp) # 4-byte Folded Spill
155+
; RV32-NEXT: .cfi_offset ra, -4
156+
; RV32-NEXT: .cfi_offset s0, -8
157+
; RV32-NEXT: addi s0, sp, 256
158+
; RV32-NEXT: .cfi_def_cfa s0, 0
159+
; RV32-NEXT: andi sp, sp, -128
160+
; RV32-NEXT: li a0, 32
161+
; RV32-NEXT: mv a1, sp
162+
; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma
163+
; RV32-NEXT: vse32.v v8, (a1)
164+
; RV32-NEXT: lw a0, 36(sp)
165+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
166+
; RV32-NEXT: vslidedown.vi v10, v8, 1
167+
; RV32-NEXT: vmv.x.s a1, v10
168+
; RV32-NEXT: vmv.v.x v10, a1
169+
; RV32-NEXT: vslide1down.vx v10, v10, a0
170+
; RV32-NEXT: lw a0, 120(sp)
171+
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
172+
; RV32-NEXT: vslidedown.vi v8, v8, 4
173+
; RV32-NEXT: vmv.x.s a1, v8
174+
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
175+
; RV32-NEXT: vslide1down.vx v8, v10, a1
176+
; RV32-NEXT: vslide1down.vx v8, v8, a0
177+
; RV32-NEXT: addi sp, s0, -256
178+
; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload
179+
; RV32-NEXT: lw s0, 248(sp) # 4-byte Folded Reload
180+
; RV32-NEXT: addi sp, sp, 256
181+
; RV32-NEXT: ret
182+
;
183+
; RV64-LABEL: v4i32_v32i32:
184+
; RV64: # %bb.0:
185+
; RV64-NEXT: addi sp, sp, -256
186+
; RV64-NEXT: .cfi_def_cfa_offset 256
187+
; RV64-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
188+
; RV64-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
189+
; RV64-NEXT: .cfi_offset ra, -8
190+
; RV64-NEXT: .cfi_offset s0, -16
191+
; RV64-NEXT: addi s0, sp, 256
192+
; RV64-NEXT: .cfi_def_cfa s0, 0
193+
; RV64-NEXT: andi sp, sp, -128
194+
; RV64-NEXT: li a0, 32
195+
; RV64-NEXT: mv a1, sp
196+
; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma
197+
; RV64-NEXT: vse32.v v8, (a1)
198+
; RV64-NEXT: lw a0, 36(sp)
199+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
200+
; RV64-NEXT: vslidedown.vi v10, v8, 1
201+
; RV64-NEXT: vmv.x.s a1, v10
202+
; RV64-NEXT: vmv.v.x v10, a1
203+
; RV64-NEXT: vslide1down.vx v10, v10, a0
204+
; RV64-NEXT: lw a0, 120(sp)
205+
; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma
206+
; RV64-NEXT: vslidedown.vi v8, v8, 4
207+
; RV64-NEXT: vmv.x.s a1, v8
208+
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
209+
; RV64-NEXT: vslide1down.vx v8, v10, a1
210+
; RV64-NEXT: vslide1down.vx v8, v8, a0
211+
; RV64-NEXT: addi sp, s0, -256
212+
; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
213+
; RV64-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
214+
; RV64-NEXT: addi sp, sp, 256
215+
; RV64-NEXT: ret
216+
%2 = shufflevector <32 x i32> %0, <32 x i32> poison, <4 x i32> <i32 1, i32 9, i32 4, i32 30>
217+
ret <4 x i32> %2
218+
}
219+
220+
define <16 x i1> @v16i1_v8i1(<8 x i1>) {
221+
; CHECK-LABEL: v16i1_v8i1:
222+
; CHECK: # %bb.0:
223+
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
224+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
225+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
226+
; CHECK-NEXT: vle8.v v8, (a0)
227+
; CHECK-NEXT: vmv.v.i v9, 0
228+
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
229+
; CHECK-NEXT: vrgather.vv v10, v9, v8
230+
; CHECK-NEXT: vmsne.vi v0, v10, 0
231+
; CHECK-NEXT: ret
232+
%2 = shufflevector <8 x i1> %0, <8 x i1> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 5, i32 1, i32 2, i32 0, i32 6, i32 2, i32 3, i32 0, i32 7, i32 1, i32 2, i32 0, i32 4>
233+
ret <16 x i1> %2
234+
}
235+
236+
define <8 x i32> @v8i32_v4i32(<4 x i32>) {
237+
; CHECK-LABEL: v8i32_v4i32:
238+
; CHECK: # %bb.0:
239+
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
240+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
241+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
242+
; CHECK-NEXT: vle16.v v12, (a0)
243+
; CHECK-NEXT: vrgatherei16.vv v10, v8, v12
244+
; CHECK-NEXT: vmv.v.v v8, v10
245+
; CHECK-NEXT: ret
246+
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3>
247+
ret <8 x i32> %2
248+
}
249+
250+
define <16 x i32> @v16i32_v4i32(<4 x i32>) {
251+
; CHECK-LABEL: v16i32_v4i32:
252+
; CHECK: # %bb.0:
253+
; CHECK-NEXT: lui a0, 2
254+
; CHECK-NEXT: addi a1, a0, 265
255+
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
256+
; CHECK-NEXT: vmv.s.x v0, a1
257+
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
258+
; CHECK-NEXT: vmv.v.i v9, 3
259+
; CHECK-NEXT: vmerge.vim v10, v9, 2, v0
260+
; CHECK-NEXT: lui a1, 4
261+
; CHECK-NEXT: addi a1, a1, 548
262+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
263+
; CHECK-NEXT: vmv.s.x v0, a1
264+
; CHECK-NEXT: addi a0, a0, -1856
265+
; CHECK-NEXT: vmv.s.x v9, a0
266+
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
267+
; CHECK-NEXT: vmerge.vim v10, v10, 0, v0
268+
; CHECK-NEXT: vmv1r.v v0, v9
269+
; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
270+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
271+
; CHECK-NEXT: vsext.vf2 v16, v9
272+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
273+
; CHECK-NEXT: vrgatherei16.vv v12, v8, v16
274+
; CHECK-NEXT: vmv.v.v v8, v12
275+
; CHECK-NEXT: ret
276+
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
277+
ret <16 x i32> %2
278+
}
279+
280+
define <32 x i32> @v32i32_v4i32(<4 x i32>) {
281+
; CHECK-LABEL: v32i32_v4i32:
282+
; CHECK: # %bb.0:
283+
; CHECK-NEXT: li a0, 32
284+
; CHECK-NEXT: lui a1, 135432
285+
; CHECK-NEXT: addi a1, a1, 1161
286+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
287+
; CHECK-NEXT: vmv.s.x v0, a1
288+
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
289+
; CHECK-NEXT: vmv.v.i v10, 3
290+
; CHECK-NEXT: vmerge.vim v10, v10, 2, v0
291+
; CHECK-NEXT: lui a0, 270865
292+
; CHECK-NEXT: addi a0, a0, 548
293+
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
294+
; CHECK-NEXT: vmv.s.x v0, a0
295+
; CHECK-NEXT: lui a0, 100550
296+
; CHECK-NEXT: addi a0, a0, 64
297+
; CHECK-NEXT: vmv.s.x v9, a0
298+
; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma
299+
; CHECK-NEXT: vmerge.vim v10, v10, 0, v0
300+
; CHECK-NEXT: vmv1r.v v0, v9
301+
; CHECK-NEXT: vmerge.vim v10, v10, 1, v0
302+
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma
303+
; CHECK-NEXT: vsext.vf2 v24, v10
304+
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
305+
; CHECK-NEXT: vrgatherei16.vv v16, v8, v24
306+
; CHECK-NEXT: vmv.v.v v8, v16
307+
; CHECK-NEXT: ret
308+
%2 = shufflevector <4 x i32> %0, <4 x i32> poison, <32 x i32> <i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 0, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3, i32 1, i32 2, i32 0, i32 3, i32 1, i32 1, i32 2, i32 0, i32 3>
309+
ret <32 x i32> %2
310+
}

0 commit comments

Comments
 (0)