Open
Description
Compiling the following C++ snippet with clang++ -O3
and g++ -O3
(see here):
#include <vector>
unsigned foo() {
std::vector<unsigned> a;
a.push_back(314);
return a[0];
}
generates this assembly on x86_64:
foo(): # @foo()
push rax
mov edi, 4
call operator new(unsigned long)
mov rdi, rax
call operator delete(void*)
mov eax, 314
pop rcx
ret
(note: clang generates perfect assembly even with multiple push backs, the only thing that seems to trip it is a reallocation)
This snippet compiled with rustc --C opt-level=3
(see here):
pub fn foo() -> u32 {
let mut v: Vec<u32> = Vec::new();
v.push(0);
v[0]
}
generates the following assembly:
<alloc::raw_vec::RawVec<T, A>>::double:
push rbp
mov rbp, rsp
push r14
push rbx
sub rsp, 64
mov r14, rdi
mov rbx, qword ptr [r14 + 8]
test rbx, rbx
je .LBB0_6
lea rsi, [4*rbx]
lea rcx, [8*rbx]
mov rdi, qword ptr [r14]
lea r9, [rbp - 40]
mov edx, 4
mov r8d, 4
call __rust_realloc@PLT
test rax, rax
je .LBB0_4
add rbx, rbx
jmp .LBB0_3
.LBB0_6:
lea rdx, [rbp - 40]
mov edi, 16
mov esi, 4
call __rust_alloc@PLT
test rax, rax
je .LBB0_8
mov ebx, 4
.LBB0_3:
mov qword ptr [r14], rax
mov qword ptr [r14 + 8], rbx
add rsp, 64
pop rbx
pop r14
pop rbp
ret
.LBB0_4:
mov rax, qword ptr [rbp - 40]
movups xmm0, xmmword ptr [rbp - 32]
movaps xmmword ptr [rbp - 64], xmm0
mov qword ptr [rbp - 40], rax
movaps xmm0, xmmword ptr [rbp - 64]
jmp .LBB0_5
.LBB0_8:
movups xmm0, xmmword ptr [rbp - 32]
movaps xmmword ptr [rbp - 64], xmm0
movaps xmm0, xmmword ptr [rbp - 64]
movaps xmmword ptr [rbp - 80], xmm0
movaps xmm0, xmmword ptr [rbp - 80]
.LBB0_5:
movups xmmword ptr [rbp - 32], xmm0
lea rdi, [rbp - 40]
call <alloc::heap::Heap as alloc::allocator::Alloc>::oom
core::ptr::drop_in_place:
push rbp
mov rbp, rsp
mov rsi, qword ptr [rdi + 8]
test rsi, rsi
je .LBB1_1
mov rdi, qword ptr [rdi]
shl rsi, 2
mov edx, 4
pop rbp
jmp __rust_dealloc@PLT
.LBB1_1:
pop rbp
ret
<alloc::heap::Heap as alloc::allocator::Alloc>::oom:
push rbp
mov rbp, rsp
sub rsp, 32
mov rax, qword ptr [rdi + 16]
mov qword ptr [rbp - 16], rax
movups xmm0, xmmword ptr [rdi]
movaps xmmword ptr [rbp - 32], xmm0
lea rdi, [rbp - 32]
call __rust_oom@PLT
example::foo:
push rbp
mov rbp, rsp
push rbx
sub rsp, 24
mov qword ptr [rbp - 32], 4
xorps xmm0, xmm0
movups xmmword ptr [rbp - 24], xmm0
lea rdi, [rbp - 32]
call <alloc::raw_vec::RawVec<T, A>>::double
mov rdi, qword ptr [rbp - 32]
mov rax, qword ptr [rbp - 16]
mov dword ptr [rdi + 4*rax], 0
inc rax
mov qword ptr [rbp - 16], rax
je .LBB3_2
mov ebx, dword ptr [rdi]
mov rsi, qword ptr [rbp - 24]
test rsi, rsi
je .LBB3_6
shl rsi, 2
mov edx, 4
call __rust_dealloc@PLT
.LBB3_6:
mov eax, ebx
add rsp, 24
pop rbx
pop rbp
ret
.LBB3_2:
lea rdi, [rip + panic_bounds_check_loc.2]
xor esi, esi
xor edx, edx
call core::panicking::panic_bounds_check@PLT
mov rbx, rax
lea rdi, [rbp - 32]
call core::ptr::drop_in_place
mov rdi, rbx
call _Unwind_Resume@PLT
GCC_except_table3:
.byte 255
.byte 155
.asciz "\234"
.byte 3
.byte 26
.long .Ltmp29-.Lfunc_begin3
.long .Ltmp32-.Ltmp29
.long .Ltmp33-.Lfunc_begin3
.byte 0
.long .Ltmp32-.Lfunc_begin3
.long .Lfunc_end3-.Ltmp32
.long 0
.byte 0
str.1:
.ascii "/checkout/src/liballoc/vec.rs"
panic_bounds_check_loc.2:
.quad str.1
.quad 29
.long 1555
.long 10
DW.ref.rust_eh_personality:
.quad rust_eh_personality
I've tried adding -lto
and -C panic=abort
to rustc without much luck. I've also tried replacing [0]
with unsafe { *v.get_unchecked(0) }
without any luck. The only thing that makes it generate good assembly is using Vec::with_capacity(N)
(see here):
pub fn foo() -> u32 {
let mut v: Vec<u32> = Vec::with_capacity(3);
v.push(7);
v.push(4);
v[1]
}
generates
example::foo:
push rbp
mov rbp, rsp
mov eax, 4
pop rbp
ret
Metadata
Metadata
Assignees
Labels
Area: Code generationCategory: An issue proposing an enhancement or a PR with one.Category: An issue highlighting optimization opportunities or PRs implementing suchIssue: Problems and improvements with respect to performance of generated code.Relevant to the compiler team, which will review and decide on the PR/issue.