Skip to content

Commit 213edc3

Browse files
committed
[X86][SSE] Add shuffled load tests from PR16739
llvm-svn: 369112
1 parent 3ccee5f commit 213edc3

File tree

1 file changed

+212
-0
lines changed

1 file changed

+212
-0
lines changed

llvm/test/CodeGen/X86/load-partial.ll

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,45 @@ define <4 x float> @load_float4_float3(<4 x float>* nocapture readonly dereferen
3131
ret <4 x float> %r2
3232
}
3333

34+
define <4 x float> @load_float4_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
35+
; SSE2-LABEL: load_float4_float3_0122:
36+
; SSE2: # %bb.0:
37+
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
38+
; SSE2-NEXT: movups (%rdi), %xmm0
39+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
40+
; SSE2-NEXT: retq
41+
;
42+
; SSSE3-LABEL: load_float4_float3_0122:
43+
; SSSE3: # %bb.0:
44+
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
45+
; SSSE3-NEXT: movups (%rdi), %xmm0
46+
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
47+
; SSSE3-NEXT: retq
48+
;
49+
; SSE41-LABEL: load_float4_float3_0122:
50+
; SSE41: # %bb.0:
51+
; SSE41-NEXT: movups (%rdi), %xmm0
52+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
53+
; SSE41-NEXT: retq
54+
;
55+
; AVX-LABEL: load_float4_float3_0122:
56+
; AVX: # %bb.0:
57+
; AVX-NEXT: vmovups (%rdi), %xmm0
58+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
59+
; AVX-NEXT: retq
60+
%p0 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 0
61+
%p1 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 1
62+
%p2 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 2
63+
%ld0 = load float, float* %p0, align 4
64+
%ld1 = load float, float* %p1, align 4
65+
%ld2 = load float, float* %p2, align 4
66+
%r0 = insertelement <4 x float> undef, float %ld0, i32 0
67+
%r1 = insertelement <4 x float> %r0, float %ld1, i32 1
68+
%r2 = insertelement <4 x float> %r1, float %ld2, i32 2
69+
%r3 = insertelement <4 x float> %r1, float %ld2, i32 3
70+
ret <4 x float> %r3
71+
}
72+
3473
define <8 x float> @load_float8_float3(<4 x float>* nocapture readonly dereferenceable(16)) {
3574
; SSE-LABEL: load_float8_float3:
3675
; SSE: # %bb.0:
@@ -53,6 +92,45 @@ define <8 x float> @load_float8_float3(<4 x float>* nocapture readonly dereferen
5392
ret <8 x float> %r2
5493
}
5594

95+
define <8 x float> @load_float8_float3_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
96+
; SSE2-LABEL: load_float8_float3_0122:
97+
; SSE2: # %bb.0:
98+
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
99+
; SSE2-NEXT: movups (%rdi), %xmm0
100+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
101+
; SSE2-NEXT: retq
102+
;
103+
; SSSE3-LABEL: load_float8_float3_0122:
104+
; SSSE3: # %bb.0:
105+
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
106+
; SSSE3-NEXT: movups (%rdi), %xmm0
107+
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
108+
; SSSE3-NEXT: retq
109+
;
110+
; SSE41-LABEL: load_float8_float3_0122:
111+
; SSE41: # %bb.0:
112+
; SSE41-NEXT: movups (%rdi), %xmm0
113+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
114+
; SSE41-NEXT: retq
115+
;
116+
; AVX-LABEL: load_float8_float3_0122:
117+
; AVX: # %bb.0:
118+
; AVX-NEXT: vmovups (%rdi), %xmm0
119+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
120+
; AVX-NEXT: retq
121+
%p0 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 0
122+
%p1 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 1
123+
%p2 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 2
124+
%ld0 = load float, float* %p0, align 4
125+
%ld1 = load float, float* %p1, align 4
126+
%ld2 = load float, float* %p2, align 4
127+
%r0 = insertelement <8 x float> undef, float %ld0, i32 0
128+
%r1 = insertelement <8 x float> %r0, float %ld1, i32 1
129+
%r2 = insertelement <8 x float> %r1, float %ld2, i32 2
130+
%r3 = insertelement <8 x float> %r1, float %ld2, i32 3
131+
ret <8 x float> %r3
132+
}
133+
56134
define <4 x float> @load_float4_float3_as_float2_float(<4 x float>* nocapture readonly dereferenceable(16)) {
57135
; SSE-LABEL: load_float4_float3_as_float2_float:
58136
; SSE: # %bb.0:
@@ -75,6 +153,47 @@ define <4 x float> @load_float4_float3_as_float2_float(<4 x float>* nocapture re
75153
ret <4 x float> %10
76154
}
77155

156+
define <4 x float> @load_float4_float3_as_float2_float_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
157+
; SSE2-LABEL: load_float4_float3_as_float2_float_0122:
158+
; SSE2: # %bb.0:
159+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
160+
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
161+
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
162+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
163+
; SSE2-NEXT: retq
164+
;
165+
; SSSE3-LABEL: load_float4_float3_as_float2_float_0122:
166+
; SSSE3: # %bb.0:
167+
; SSSE3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
168+
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
169+
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
170+
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
171+
; SSSE3-NEXT: retq
172+
;
173+
; SSE41-LABEL: load_float4_float3_as_float2_float_0122:
174+
; SSE41: # %bb.0:
175+
; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
176+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
177+
; SSE41-NEXT: retq
178+
;
179+
; AVX-LABEL: load_float4_float3_as_float2_float_0122:
180+
; AVX: # %bb.0:
181+
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
182+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
183+
; AVX-NEXT: retq
184+
%2 = bitcast <4 x float>* %0 to <2 x float>*
185+
%3 = load <2 x float>, <2 x float>* %2, align 4
186+
%4 = extractelement <2 x float> %3, i32 0
187+
%5 = insertelement <4 x float> undef, float %4, i32 0
188+
%6 = extractelement <2 x float> %3, i32 1
189+
%7 = insertelement <4 x float> %5, float %6, i32 1
190+
%8 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 2
191+
%9 = load float, float* %8, align 4
192+
%10 = insertelement <4 x float> %7, float %9, i32 2
193+
%11 = insertelement <4 x float> %7, float %9, i32 3
194+
ret <4 x float> %11
195+
}
196+
78197
define <4 x float> @load_float4_float3_trunc(<4 x float>* nocapture readonly dereferenceable(16)) {
79198
; SSE-LABEL: load_float4_float3_trunc:
80199
; SSE: # %bb.0:
@@ -103,6 +222,99 @@ define <4 x float> @load_float4_float3_trunc(<4 x float>* nocapture readonly der
103222
ret <4 x float> %16
104223
}
105224

225+
define <4 x float> @load_float4_float3_trunc_0122(<4 x float>* nocapture readonly dereferenceable(16)) {
226+
; SSE2-LABEL: load_float4_float3_trunc_0122:
227+
; SSE2: # %bb.0:
228+
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
229+
; SSE2-NEXT: movaps (%rdi), %xmm0
230+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
231+
; SSE2-NEXT: retq
232+
;
233+
; SSSE3-LABEL: load_float4_float3_trunc_0122:
234+
; SSSE3: # %bb.0:
235+
; SSSE3-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
236+
; SSSE3-NEXT: movaps (%rdi), %xmm0
237+
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
238+
; SSSE3-NEXT: retq
239+
;
240+
; SSE41-LABEL: load_float4_float3_trunc_0122:
241+
; SSE41: # %bb.0:
242+
; SSE41-NEXT: movaps (%rdi), %xmm0
243+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
244+
; SSE41-NEXT: retq
245+
;
246+
; AVX-LABEL: load_float4_float3_trunc_0122:
247+
; AVX: # %bb.0:
248+
; AVX-NEXT: vmovaps (%rdi), %xmm0
249+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
250+
; AVX-NEXT: retq
251+
%2 = bitcast <4 x float>* %0 to i64*
252+
%3 = load i64, i64* %2, align 16
253+
%4 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 2
254+
%5 = bitcast float* %4 to i64*
255+
%6 = load i64, i64* %5, align 8
256+
%7 = trunc i64 %3 to i32
257+
%8 = bitcast i32 %7 to float
258+
%9 = insertelement <4 x float> undef, float %8, i32 0
259+
%10 = lshr i64 %3, 32
260+
%11 = trunc i64 %10 to i32
261+
%12 = bitcast i32 %11 to float
262+
%13 = insertelement <4 x float> %9, float %12, i32 1
263+
%14 = trunc i64 %6 to i32
264+
%15 = bitcast i32 %14 to float
265+
%16 = insertelement <4 x float> %13, float %15, i32 2
266+
%17 = insertelement <4 x float> %13, float %15, i32 3
267+
ret <4 x float> %17
268+
}
269+
270+
define <4 x float> @load_float4_float3_trunc_0123(<4 x float>* nocapture readonly dereferenceable(16)) {
271+
; SSE2-LABEL: load_float4_float3_trunc_0123:
272+
; SSE2: # %bb.0:
273+
; SSE2-NEXT: movaps (%rdi), %xmm0
274+
; SSE2-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
275+
; SSE2-NEXT: retq
276+
;
277+
; SSSE3-LABEL: load_float4_float3_trunc_0123:
278+
; SSSE3: # %bb.0:
279+
; SSSE3-NEXT: movaps (%rdi), %xmm0
280+
; SSSE3-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
281+
; SSSE3-NEXT: retq
282+
;
283+
; SSE41-LABEL: load_float4_float3_trunc_0123:
284+
; SSE41: # %bb.0:
285+
; SSE41-NEXT: movaps (%rdi), %xmm0
286+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
287+
; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
288+
; SSE41-NEXT: retq
289+
;
290+
; AVX-LABEL: load_float4_float3_trunc_0123:
291+
; AVX: # %bb.0:
292+
; AVX-NEXT: vmovaps (%rdi), %xmm0
293+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
294+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
295+
; AVX-NEXT: retq
296+
%2 = bitcast <4 x float>* %0 to i64*
297+
%3 = load i64, i64* %2, align 16
298+
%4 = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0, i64 2
299+
%5 = bitcast float* %4 to i64*
300+
%6 = load i64, i64* %5, align 8
301+
%7 = trunc i64 %3 to i32
302+
%8 = bitcast i32 %7 to float
303+
%9 = insertelement <4 x float> undef, float %8, i32 0
304+
%10 = lshr i64 %3, 32
305+
%11 = trunc i64 %10 to i32
306+
%12 = bitcast i32 %11 to float
307+
%13 = insertelement <4 x float> %9, float %12, i32 1
308+
%14 = trunc i64 %6 to i32
309+
%15 = bitcast i32 %14 to float
310+
%16 = insertelement <4 x float> %13, float %15, i32 2
311+
%17 = lshr i64 %6, 32
312+
%18 = trunc i64 %17 to i32
313+
%19 = bitcast i32 %18 to float
314+
%20 = insertelement <4 x float> %16, float %19, i32 3
315+
ret <4 x float> %20
316+
}
317+
106318
; PR21780
107319
define <4 x double> @load_double4_0u2u(double* nocapture readonly dereferenceable(32)) {
108320
; SSE2-LABEL: load_double4_0u2u:

0 commit comments

Comments
 (0)