Closed
Description
Bugzilla Link | 49081 |
Version | trunk |
OS | Linux |
Depends On | #35080 |
CC | @alexey-bataev,@anton-afanasyev,@topperc,@RKSimon,@phoebewang,@rotateright |
Extended Description
typedef int v4si __attribute__((vector_size(16)));
typedef float v4sf __attribute__((vector_size(16)));
v4sf f(v4si f)
{
return (v4sf){(float)f[1], (float)f[1], (float)f[2], (float)f[3]};
}
With -O3, GCC outputs this:
f(int __vector(4)):
pshufd xmm0, xmm0, 229
cvtdq2ps xmm0, xmm0
ret
LLVM outputs this:
f(int __vector(4)):
pshufd xmm1, xmm0, 85 # xmm1 = xmm0[1,1,1,1]
cvtdq2ps xmm1, xmm1
pshufd xmm2, xmm0, 238 # xmm2 = xmm0[2,3,2,3]
cvtdq2ps xmm2, xmm2
pshufd xmm0, xmm0, 255 # xmm0 = xmm0[3,3,3,3]
cvtdq2ps xmm0, xmm0
unpcklps xmm2, xmm0 # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
shufps xmm1, xmm2, 64 # xmm1 = xmm1[0,0],xmm2[0,1]
movaps xmm0, xmm1
ret