Open
Description
In #74254 we observed that returning expr.assume_init() from a function unexpectedly inhibits the return value from being constructed in place up front.
https://rust.godbolt.org/z/hr77qM
#![allow(deprecated)]
use std::mem::{self, MaybeUninit};
use std::ptr;
type T = String;
const N: usize = 2;
// fast
pub fn a() -> [T; N] {
Default::default()
}
// fast
pub fn b() -> [T; N] {
unsafe {
// ignore the UB for now
let mut array: [T; N] = mem::uninitialized();
for slot in &mut array {
ptr::write(slot, T::default());
}
array
}
}
// slow
pub fn c() -> [T; N] {
let mut array: MaybeUninit<[T; N]> = MaybeUninit::uninit();
unsafe {
// ignore the UB for now
// ordinarily would cast to &mut [MaybeUninit<T>; N]
// but here we try to minimize difference from `b`
let slots = &mut *array.as_mut_ptr();
for slot in slots {
ptr::write(slot, T::default());
}
array.assume_init()
}
}
Notice that in the slow function the return value is constructed exactly the same as in both of the fast functions (6 instructions) except in the wrong place, then relocated from [rsp-48] to [rdi] 😢 (12 instructions).
example::a:
mov rax, rdi
mov qword ptr [rdi], 1
vxorps xmm0, xmm0, xmm0
vmovups xmmword ptr [rdi + 8], xmm0
mov qword ptr [rdi + 24], 1
vmovups xmmword ptr [rdi + 32], xmm0
ret
example::b:
mov rax, rdi
mov qword ptr [rdi], 1
vxorps xmm0, xmm0, xmm0
vmovups xmmword ptr [rdi + 8], xmm0
mov qword ptr [rdi + 24], 1
vmovups xmmword ptr [rdi + 32], xmm0
ret
example::c:
sub rsp, 48
mov rax, rdi
mov qword ptr [rsp], 1
vxorps xmm0, xmm0, xmm0
vmovups xmmword ptr [rsp + 8], xmm0
mov qword ptr [rsp + 24], 1
vmovups xmmword ptr [rsp + 32], xmm0
mov rcx, qword ptr [rsp]
mov qword ptr [rdi], rcx
vmovups xmm0, xmmword ptr [rsp + 8]
vmovups xmmword ptr [rdi + 8], xmm0
mov rcx, qword ptr [rsp + 24]
mov qword ptr [rdi + 24], rcx
mov rcx, qword ptr [rsp + 16]
mov qword ptr [rdi + 16], rcx
mov rcx, qword ptr [rsp + 24]
mov qword ptr [rdi + 24], rcx
vmovups xmm0, xmmword ptr [rsp + 32]
vmovups xmmword ptr [rdi + 32], xmm0
add rsp, 48
ret
Metadata
Metadata
Assignees
Labels
Area: Code generationCategory: An issue highlighting optimization opportunities or PRs implementing suchCall for participation: An issue has been fixed and does not reproduce, but no test has been added.Issue: Problems and improvements with respect to performance of generated code.Relevant to the compiler team, which will review and decide on the PR/issue.