Skip to content

Possibly sub-optimal optimization of few bytes copy #130304

Open
@leonardo-m

Description

@leonardo-m

This code contains three examples. foo0 and foo2 are optimized well (with a mov + 4 bytes istruction), while foo1 shows single byte copies.

pub fn foo0(data: &mut [u8]) -> &[u8] {
    if data.len() >= 5 {
        data[0] = b'F';
        data[1] = b'a';
        data[2] = b'l';
        data[3] = b's';
        data[4] = b'e';
        &data[.. 5]
    } else {
        &[]
    }
}

pub fn foo1(data: &mut [u8]) -> &[u8] {
    if data.len() >= 5 {
        data[0] = b'F';
        data[1] = b'a';
        data[2] = b'l';
        data[3] = b's';
        data[4] = b'e';
        &data[.. 5]
    } else if data.len() >= 4 {
        data[0] = b'T';
        data[1] = b'r';
        data[2] = b'u';
        data[3] = b'e';
        &data[.. 4]
    } else {
        &[]
    }
}

pub fn foo2(data: &mut [u8]) -> &[u8] {
    if data.len() >= 5 {
        data[.. 5].copy_from_slice(&[b'F', b'a', b'l', b's', b'e']);
        &data[.. 5]
    } else if data.len() >= 4 {
        data[.. 4].copy_from_slice(&[b'T', b'r', b'u', b'e']);
        &data[.. 4]
    } else {
        &[]
    }
}

fn main() {}

Using the godbolt site with:

rustc 1.83.0-nightly (8d6b88b16 2024-09-11)
binary: rustc
commit-hash: 8d6b88b168e45ee1624699c19443c49665322a91
commit-date: 2024-09-11
host: x86_64-unknown-linux-gnu
release: 1.83.0-nightly
LLVM version: 19.1.0

Compilation using -C opt-level=3 and other aggressive optimization flags.

The asm is:

foo0:
        cmp     rsi, 4
        jbe     .LBB0_1
        mov     rax, rdi
        mov     dword ptr [rdi], 1936482630
        mov     byte ptr [rdi + 4], 101
        mov     edx, 5
        ret
.LBB0_1:
        mov     eax, 1
        xor     edx, edx
        ret


foo1:
        mov     rax, rdi
        cmp     rsi, 4
        jbe     .LBB1_1
        mov     word ptr [rax], 24902
        mov     edx, 5
        mov     ecx, 4
        mov     sil, 115
        mov     edi, 3
        mov     r8b, 108
        mov     r9d, 2
        mov     byte ptr [rax + r9], r8b
        mov     byte ptr [rax + rdi], sil
        mov     byte ptr [rax + rcx], 101
        ret
.LBB1_1:
        jne     .LBB1_2
        mov     byte ptr [rax], 84
        mov     edx, 4
        mov     ecx, 3
        mov     sil, 117
        mov     edi, 2
        mov     r8b, 114
        mov     r9d, 1
        mov     byte ptr [rax + r9], r8b
        mov     byte ptr [rax + rdi], sil
        mov     byte ptr [rax + rcx], 101
        ret
.LBB1_2:
        mov     eax, 1
        xor     edx, edx
        ret


foo2:
        mov     rax, rdi
        cmp     rsi, 4
        jbe     .LBB2_1
        mov     byte ptr [rax + 4], 101
        mov     dword ptr [rax], 1936482630
        mov     edx, 5
        ret
.LBB2_1:
        jne     .LBB2_2
        mov     dword ptr [rax], 1702195796
        mov     edx, 4
        ret
.LBB2_2:
        mov     eax, 1
        xor     edx, edx
        ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-bugCategory: This is a bug.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchI-heavyIssue: Problems and improvements with respect to binary size of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions