Closed
Description
Demo: https://godbolt.org/g/vqB6oj
I tried this code:
pub struct v {
val:[i32;16]
}
pub fn test(a:v, b:v) -> v {
let mut res = v { val : [0;16] };
for i in 0..16 {
res.val[i] = a.val[i] + b.val[i];
}
return res;
}
Compiled it with
rustc --crate-type=lib -C opt-level=3 -C target-cpu=skylake-avx512 --emit asm test.rs
I expected to see this happen:
vmovdqu32 zmm0, zmmword ptr [rsp + 72]
vpaddd zmm0, zmm0, zmmword ptr [rsp + 8]
vmovdqu32 zmmword ptr [rdi], zmm0
mov rax, rdi
vzeroupper
ret
Instead, this happened:
movq $0, 56(%rsp)
vmovdqu (%rdx), %ymm0
vpaddd (%rsi), %ymm0, %ymm0
vmovdqu %ymm0, (%rsp)
movl 32(%rdx), %eax
addl 32(%rsi), %eax
movl %eax, 32(%rsp)
movl 36(%rdx), %eax
addl 36(%rsi), %eax
movl %eax, 36(%rsp)
movl 40(%rdx), %eax
addl 40(%rsi), %eax
movl %eax, 40(%rsp)
movl 44(%rdx), %eax
addl 44(%rsi), %eax
movl %eax, 44(%rsp)
movl 48(%rdx), %eax
addl 48(%rsi), %eax
movl %eax, 48(%rsp)
movl 52(%rdx), %eax
addl 52(%rsi), %eax
movl %eax, 52(%rsp)
movl 56(%rdx), %eax
addl 56(%rsi), %eax
movl %eax, 56(%rsp)
movl 60(%rdx), %eax
addl 60(%rsi), %eax
movl %eax, 60(%rsp)
vmovdqu (%rsp), %ymm0
vmovdqu 32(%rsp), %ymm1
vmovdqu %ymm1, 32(%rdi)
vmovdqu %ymm0, (%rdi)
movq %rdi, %rax
addq $64, %rsp
retq
Meta
~$ rustc --version --verbose
rustc 1.24.0 (4d90ac38c 2018-02-12)
binary: rustc
commit-hash: 4d90ac38c0b61bb69470b61ea2cccea0df48d9e5
commit-date: 2018-02-12
host: x86_64-unknown-linux-gnu
release: 1.24.0
LLVM version: 4.0