Open
Description
Edit: As pointed out by @oyvindln, the problem appears to be a regression introduced in 1.20.
Demonstration: https://godbolt.org/g/5uuzVL
Version: rustc 1.22.0-nightly (277476c 2017-09-16) , -C opt-level=3 -C target-cpu=native
Code:
use std::rc::Rc;
use std::cell::RefCell;
pub struct Buffer {
buf: Vec<u8>,
pool: Rc<RefCell<Vec<Vec<u8>>>>,
}
impl Drop for Buffer {
fn drop(&mut self) {
self.pool.borrow_mut()
.push(std::mem::replace(&mut self.buf, vec![]));
}
}
Expected result: An optimal code would move self.buf
directly into self.pool
and then reset self.buf
in-place. An acceptable code would moveself.buf
into a temporary on the stack, move the temporary into self.pool
and reset self.buf
in-place.
Observed result:
- Space for two
std::Vec<u8>
(each 24 bytes) is allocated on the stack,-48(%rbp)
(A) and-96(%rbp)
(B). self.buf
is copied to A.self.buf
is reset in-place.- A is copied to B.
- B is copied to A.
- A is inserted into
self.pool
.
Steps 4 and 5 is a completely unnecessary copying of 48 bytes and could be safely removed. Replacing std::mem::replace
with an equivalent std::mem::swap
call produces a slightly different code with the same basic problem.
Compiler output:
<example::Buffer as core::ops::drop::Drop>::drop:
pushq %rbp
movq %rsp, %rbp
pushq %rbx
subq $88, %rsp
movq %rdi, %rax
movq 24(%rax), %rbx
cmpq $0, 16(%rbx)
jne .LBB6_6
leaq 16(%rbx), %rcx
movq $-1, 16(%rbx)
leaq 24(%rbx), %rdi
movq %rdi, -64(%rbp)
movq %rcx, -56(%rbp)
;2
movq 16(%rax), %rcx
movq %rcx, -32(%rbp)
vmovups (%rax), %xmm0
vmovaps %xmm0, -48(%rbp)
;3
movq $1, (%rax)
vxorps %xmm0, %xmm0, %xmm0
vmovups %xmm0, 8(%rax)
;4
movq -32(%rbp), %rax
movq %rax, -80(%rbp)
vmovaps -48(%rbp), %xmm0
vmovaps %xmm0, -96(%rbp)
;5
movq -80(%rbp), %rax
movq %rax, -32(%rbp)
vmovaps -96(%rbp), %xmm0
vmovaps %xmm0, -48(%rbp)
;6
movq 40(%rbx), %rax
cmpq 32(%rbx), %rax
jne .LBB6_4
callq <alloc::raw_vec::RawVec<T, A>>::double
movq 40(%rbx), %rax
.LBB6_4:
movq 24(%rbx), %rcx
leaq (%rax,%rax,2), %rax
movq -32(%rbp), %rdx
movq %rdx, 16(%rcx,%rax,8)
vmovaps -48(%rbp), %xmm0
vmovups %xmm0, (%rcx,%rax,8)
incq 40(%rbx)
movq $0, 16(%rbx)
addq $88, %rsp
popq %rbx
popq %rbp
retq
.LBB6_6:
callq core::result::unwrap_failed
movq %rax, %rbx
leaq -48(%rbp), %rdi
callq core::ptr::drop_in_place
leaq -64(%rbp), %rdi
callq core::ptr::drop_in_place
movq %rbx, %rdi
callq _Unwind_Resume@PLT