Skip to content

Missed code elimination with std::mem::replace/std::mem::swap #44701

Open
@batonius

Description

@batonius

Edit: As pointed out by @oyvindln, the problem appears to be a regression introduced in 1.20.

Demonstration: https://godbolt.org/g/5uuzVL

Version: rustc 1.22.0-nightly (277476c 2017-09-16) , -C opt-level=3 -C target-cpu=native

Code:

use std::rc::Rc;
use std::cell::RefCell;

pub struct Buffer {
    buf: Vec<u8>,
    pool: Rc<RefCell<Vec<Vec<u8>>>>,
}

impl Drop for Buffer {
    fn drop(&mut self) {
        self.pool.borrow_mut()
            .push(std::mem::replace(&mut self.buf, vec![]));        
    }
}

Expected result: An optimal code would move self.buf directly into self.pool and then reset self.buf in-place. An acceptable code would moveself.buf into a temporary on the stack, move the temporary into self.pool and reset self.buf in-place.

Observed result:

  1. Space for two std::Vec<u8>(each 24 bytes) is allocated on the stack, -48(%rbp) (A) and -96(%rbp) (B).
  2. self.buf is copied to A.
  3. self.buf is reset in-place.
  4. A is copied to B.
  5. B is copied to A.
  6. A is inserted into self.pool.

Steps 4 and 5 is a completely unnecessary copying of 48 bytes and could be safely removed. Replacing std::mem::replace with an equivalent std::mem::swap call produces a slightly different code with the same basic problem.

Compiler output:

<example::Buffer as core::ops::drop::Drop>::drop:
        pushq   %rbp
        movq    %rsp, %rbp
        pushq   %rbx
        subq    $88, %rsp
        movq    %rdi, %rax
        movq    24(%rax), %rbx
        cmpq    $0, 16(%rbx)
        jne     .LBB6_6
        leaq    16(%rbx), %rcx
        movq    $-1, 16(%rbx)
        leaq    24(%rbx), %rdi
        movq    %rdi, -64(%rbp)
        movq    %rcx, -56(%rbp)
;2
        movq    16(%rax), %rcx
        movq    %rcx, -32(%rbp)
        vmovups (%rax), %xmm0
        vmovaps %xmm0, -48(%rbp)
;3
        movq    $1, (%rax)
        vxorps  %xmm0, %xmm0, %xmm0
        vmovups %xmm0, 8(%rax)
;4
        movq    -32(%rbp), %rax
        movq    %rax, -80(%rbp)
        vmovaps -48(%rbp), %xmm0
        vmovaps %xmm0, -96(%rbp)
;5
        movq    -80(%rbp), %rax
        movq    %rax, -32(%rbp)
        vmovaps -96(%rbp), %xmm0
        vmovaps %xmm0, -48(%rbp)
;6
        movq    40(%rbx), %rax
        cmpq    32(%rbx), %rax
        jne     .LBB6_4
        callq   <alloc::raw_vec::RawVec<T, A>>::double
        movq    40(%rbx), %rax
.LBB6_4:
        movq    24(%rbx), %rcx
        leaq    (%rax,%rax,2), %rax
        movq    -32(%rbp), %rdx
        movq    %rdx, 16(%rcx,%rax,8)
        vmovaps -48(%rbp), %xmm0
        vmovups %xmm0, (%rcx,%rax,8)
        incq    40(%rbx)
        movq    $0, 16(%rbx)
        addq    $88, %rsp
        popq    %rbx
        popq    %rbp
        retq
.LBB6_6:
        callq   core::result::unwrap_failed
        movq    %rax, %rbx
        leaq    -48(%rbp), %rdi
        callq   core::ptr::drop_in_place
        leaq    -64(%rbp), %rdi
        callq   core::ptr::drop_in_place
        movq    %rbx, %rdi
        callq   _Unwind_Resume@PLT

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-bugCategory: This is a bug.I-heavyIssue: Problems and improvements with respect to binary size of generated code.I-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions