Open
Description
Using noalias (#45012) lets rust generate much better code for the serialization of 10 fields in good_bake_bytes() however it falls back to terrible with the 11 fields of bad_bake_bytes()
use std::io::Write;
use std::{io, ptr};
struct UnsafeVecWriter<'a>(&'a mut Vec<u8>);
impl<'a> Write for UnsafeVecWriter<'a> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
unsafe {
let old_len = self.0.len();
self.0.set_len(old_len + buf.len());
ptr::copy_nonoverlapping(buf.as_ptr(), self.0.as_mut_ptr().offset(old_len as isize), buf.len());
}
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> { Ok(()) }
}
struct Entity {
o: (f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32),
}
use std::mem::transmute;
fn do_f32<W: Write>(w: &mut W, x: f32) {
unsafe {
let p: [u8; 4] = std::mem::transmute([x]);
w.write(&p);
}
}
#[inline(never)]
fn bad_bake_bytes(vec: &mut Vec<u8>, e: &Entity) {
let w = &mut UnsafeVecWriter(vec);
do_f32(w, e.o.0);
do_f32(w, e.o.1);
do_f32(w, e.o.2);
do_f32(w, e.o.3);
do_f32(w, e.o.4);
do_f32(w, e.o.5);
do_f32(w, e.o.6);
do_f32(w, e.o.7);
do_f32(w, e.o.8);
do_f32(w, e.o.9);
do_f32(w, e.o.10);
}
#[inline(never)]
fn good_bake_bytes(vec: &mut Vec<u8>, e: &Entity) {
let w = &mut UnsafeVecWriter(vec);
do_f32(w, e.o.0);
do_f32(w, e.o.1);
do_f32(w, e.o.2);
do_f32(w, e.o.3);
do_f32(w, e.o.4);
do_f32(w, e.o.5);
do_f32(w, e.o.6);
do_f32(w, e.o.7);
do_f32(w, e.o.8);
do_f32(w, e.o.9);
//do_f32(w, e.o.10);
}
fn main() {
let mut encoded = Vec::new();
let decoded: Entity = unsafe { std::mem::uninitialized() };
bad_bake_bytes(&mut encoded, &decoded);
good_bake_bytes(&mut encoded, &decoded);
}
__ZN10serde_fast14bad_bake_bytes17h506e94e6df0b1a3bE:
.cfi_startproc
pushq %rbp
Lcfi0:
.cfi_def_cfa_offset 16
Lcfi1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi2:
.cfi_def_cfa_register %rbp
movl (%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 4(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 8(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 12(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 16(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 20(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 24(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 28(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 32(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 36(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
movl 40(%rsi), %eax
movq 16(%rdi), %rcx
leaq 4(%rcx), %rdx
movq %rdx, 16(%rdi)
movq (%rdi), %rdx
movl %eax, (%rdx,%rcx)
popq %rbp
retq
.cfi_endproc
.p2align 4, 0x90
__ZN10serde_fast15good_bake_bytes17h3098644f875a0da3E:
.cfi_startproc
pushq %rbp
Lcfi3:
.cfi_def_cfa_offset 16
Lcfi4:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Lcfi5:
.cfi_def_cfa_register %rbp
movl (%rsi), %eax
movq (%rdi), %rcx
movq 16(%rdi), %rdx
movl %eax, (%rcx,%rdx)
movl 4(%rsi), %eax
movl %eax, 4(%rcx,%rdx)
movl 8(%rsi), %eax
movl %eax, 8(%rcx,%rdx)
movl 12(%rsi), %eax
movl %eax, 12(%rcx,%rdx)
movl 16(%rsi), %eax
movl %eax, 16(%rcx,%rdx)
movl 20(%rsi), %eax
movl %eax, 20(%rcx,%rdx)
movl 24(%rsi), %eax
movl %eax, 24(%rcx,%rdx)
movl 28(%rsi), %eax
movl %eax, 28(%rcx,%rdx)
movl 32(%rsi), %eax
movl %eax, 32(%rcx,%rdx)
movl 36(%rsi), %eax
leaq 40(%rdx), %rsi
movq %rsi, 16(%rdi)
movl %eax, 36(%rcx,%rdx)
popq %rbp
retq
.cfi_endproc
Metadata
Metadata
Assignees
Labels
Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Category: An issue proposing an enhancement or a PR with one.Category: An issue highlighting optimization opportunities or PRs implementing suchIssue: Problems and improvements with respect to performance of generated code.Relevant to the compiler team, which will review and decide on the PR/issue.Working group: LLVM backend code generation