Skip to content

rust can't serialize 11 fields efficiently #45068

Open
@jrmuizel

Description

@jrmuizel

Using noalias (#45012) lets rust generate much better code for the serialization of 10 fields in good_bake_bytes() however it falls back to terrible with the 11 fields of bad_bake_bytes()

use std::io::Write;
use std::{io, ptr};

struct UnsafeVecWriter<'a>(&'a mut Vec<u8>);

impl<'a> Write for UnsafeVecWriter<'a> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        unsafe {
            let old_len = self.0.len();
            self.0.set_len(old_len + buf.len());
            ptr::copy_nonoverlapping(buf.as_ptr(), self.0.as_mut_ptr().offset(old_len as isize), buf.len());
        }
        Ok(buf.len())
    }
    fn flush(&mut self) -> io::Result<()> { Ok(()) }
}

struct Entity {
    o: (f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32),
}

use std::mem::transmute;
fn do_f32<W: Write>(w: &mut W, x: f32) {
    unsafe {
        let p: [u8; 4] = std::mem::transmute([x]);
        w.write(&p);
    }
}

#[inline(never)]
fn bad_bake_bytes(vec: &mut Vec<u8>, e: &Entity) {
    let w = &mut UnsafeVecWriter(vec);
    do_f32(w, e.o.0);
    do_f32(w, e.o.1);
    do_f32(w, e.o.2);
    do_f32(w, e.o.3);
    do_f32(w, e.o.4);
    do_f32(w, e.o.5);
    do_f32(w, e.o.6);
    do_f32(w, e.o.7);
    do_f32(w, e.o.8);
    do_f32(w, e.o.9);
    do_f32(w, e.o.10);
}

#[inline(never)]
fn good_bake_bytes(vec: &mut Vec<u8>, e: &Entity) {
    let w = &mut UnsafeVecWriter(vec);
    do_f32(w, e.o.0);
    do_f32(w, e.o.1);
    do_f32(w, e.o.2);
    do_f32(w, e.o.3);
    do_f32(w, e.o.4);
    do_f32(w, e.o.5);
    do_f32(w, e.o.6);
    do_f32(w, e.o.7);
    do_f32(w, e.o.8);
    do_f32(w, e.o.9);
    //do_f32(w, e.o.10);
}

fn main() {
    let mut encoded = Vec::new();
    let decoded: Entity = unsafe { std::mem::uninitialized() };
    bad_bake_bytes(&mut encoded, &decoded);
    good_bake_bytes(&mut encoded, &decoded);
}
__ZN10serde_fast14bad_bake_bytes17h506e94e6df0b1a3bE:
	.cfi_startproc
	pushq	%rbp
Lcfi0:
	.cfi_def_cfa_offset 16
Lcfi1:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi2:
	.cfi_def_cfa_register %rbp
	movl	(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	4(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	8(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	12(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	16(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	20(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	24(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	28(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	32(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	36(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	movl	40(%rsi), %eax
	movq	16(%rdi), %rcx
	leaq	4(%rcx), %rdx
	movq	%rdx, 16(%rdi)
	movq	(%rdi), %rdx
	movl	%eax, (%rdx,%rcx)
	popq	%rbp
	retq
	.cfi_endproc

	.p2align	4, 0x90
__ZN10serde_fast15good_bake_bytes17h3098644f875a0da3E:
	.cfi_startproc
	pushq	%rbp
Lcfi3:
	.cfi_def_cfa_offset 16
Lcfi4:
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
Lcfi5:
	.cfi_def_cfa_register %rbp
	movl	(%rsi), %eax
	movq	(%rdi), %rcx
	movq	16(%rdi), %rdx
	movl	%eax, (%rcx,%rdx)
	movl	4(%rsi), %eax
	movl	%eax, 4(%rcx,%rdx)
	movl	8(%rsi), %eax
	movl	%eax, 8(%rcx,%rdx)
	movl	12(%rsi), %eax
	movl	%eax, 12(%rcx,%rdx)
	movl	16(%rsi), %eax
	movl	%eax, 16(%rcx,%rdx)
	movl	20(%rsi), %eax
	movl	%eax, 20(%rcx,%rdx)
	movl	24(%rsi), %eax
	movl	%eax, 24(%rcx,%rdx)
	movl	28(%rsi), %eax
	movl	%eax, 28(%rcx,%rdx)
	movl	32(%rsi), %eax
	movl	%eax, 32(%rcx,%rdx)
	movl	36(%rsi), %eax
	leaq	40(%rdx), %rsi
	movq	%rsi, 16(%rdi)
	movl	%eax, 36(%rcx,%rdx)
	popq	%rbp
	retq
	.cfi_endproc

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-enhancementCategory: An issue proposing an enhancement or a PR with one.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchI-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.WG-llvmWorking group: LLVM backend code generation

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions