Skip to content

Stop emitting one-at-a-time byte ops when swapping byte arrays #134946

Closed
@scottmcm

Description

@scottmcm

Mentioned on Discord https://discord.com/channels/273534239310479360/592856094527848449/1319367290902286367

Demo when swapping [u8; 44]: https://rust.godbolt.org/z/rznror9aG

Especially with opt-level 2, there's still

        movzx   eax, byte ptr [rdi + 32]
        movzx   ecx, byte ptr [rsi + 32]
        mov     byte ptr [rdi + 32], cl
        mov     byte ptr [rsi + 32], al
        movzx   eax, byte ptr [rdi + 33]
        movzx   ecx, byte ptr [rsi + 33]
        mov     byte ptr [rdi + 33], cl
        mov     byte ptr [rsi + 33], al
        movzx   eax, byte ptr [rdi + 34]
        movzx   ecx, byte ptr [rsi + 34]
        mov     byte ptr [rdi + 34], cl
        mov     byte ptr [rsi + 34], al
        movzx   eax, byte ptr [rdi + 35]
        movzx   ecx, byte ptr [rsi + 35]
        mov     byte ptr [rdi + 35], cl
        mov     byte ptr [rsi + 35], al
        movzx   eax, byte ptr [rdi + 36]
        movzx   ecx, byte ptr [rsi + 36]
        mov     byte ptr [rdi + 36], cl
        mov     byte ptr [rsi + 36], al
        movzx   eax, byte ptr [rdi + 37]
        movzx   ecx, byte ptr [rsi + 37]
        mov     byte ptr [rdi + 37], cl
        mov     byte ptr [rsi + 37], al
        movzx   eax, byte ptr [rdi + 38]
        movzx   ecx, byte ptr [rsi + 38]
        mov     byte ptr [rdi + 38], cl
        mov     byte ptr [rsi + 38], al
        movzx   eax, byte ptr [rdi + 39]
        movzx   ecx, byte ptr [rsi + 39]
        mov     byte ptr [rdi + 39], cl
        mov     byte ptr [rsi + 39], al
        movzx   eax, byte ptr [rdi + 40]
        movzx   ecx, byte ptr [rsi + 40]
        mov     byte ptr [rdi + 40], cl
        mov     byte ptr [rsi + 40], al
        movzx   eax, byte ptr [rdi + 41]
        movzx   ecx, byte ptr [rsi + 41]
        mov     byte ptr [rdi + 41], cl
        mov     byte ptr [rsi + 41], al
        movzx   eax, byte ptr [rdi + 42]
        movzx   ecx, byte ptr [rsi + 42]
        mov     byte ptr [rdi + 42], cl
        mov     byte ptr [rsi + 42], al
        movzx   eax, byte ptr [rdi + 43]
        movzx   ecx, byte ptr [rsi + 43]
        mov     byte ptr [rdi + 43], cl
        mov     byte ptr [rsi + 43], al
        ret

And in opt-level 3 it's

  %x16.i.i.i = getelementptr inbounds i8, ptr %a, i64 32, !dbg !55
  %y18.i.i.i = getelementptr inbounds i8, ptr %b, i64 32, !dbg !62
  %a19.i.i.i = load i8, ptr %x16.i.i.i, align 1, !dbg !12
  %b20.i.i.i = load i8, ptr %y18.i.i.i, align 1, !dbg !44
  store i8 %b20.i.i.i, ptr %x16.i.i.i, align 1, !dbg !48
  store i8 %a19.i.i.i, ptr %y18.i.i.i, align 1, !dbg !53
  %x16.i.i.i.1 = getelementptr inbounds i8, ptr %a, i64 33, !dbg !55
  %y18.i.i.i.1 = getelementptr inbounds i8, ptr %b, i64 33, !dbg !62
  %a19.i.i.i.1 = load i8, ptr %x16.i.i.i.1, align 1, !dbg !12
  %b20.i.i.i.1 = load i8, ptr %y18.i.i.i.1, align 1, !dbg !44
  store i8 %b20.i.i.i.1, ptr %x16.i.i.i.1, align 1, !dbg !48
  store i8 %a19.i.i.i.1, ptr %y18.i.i.i.1, align 1, !dbg !53
  %x16.i.i.i.2 = getelementptr inbounds i8, ptr %a, i64 34, !dbg !55
  %y18.i.i.i.2 = getelementptr inbounds i8, ptr %b, i64 34, !dbg !62
  %a19.i.i.i.2 = load i8, ptr %x16.i.i.i.2, align 1, !dbg !12
  %b20.i.i.i.2 = load i8, ptr %y18.i.i.i.2, align 1, !dbg !44
  store i8 %b20.i.i.i.2, ptr %x16.i.i.i.2, align 1, !dbg !48
  store i8 %a19.i.i.i.2, ptr %y18.i.i.i.2, align 1, !dbg !53
  %x16.i.i.i.3 = getelementptr inbounds i8, ptr %a, i64 35, !dbg !55
  %y18.i.i.i.3 = getelementptr inbounds i8, ptr %b, i64 35, !dbg !62
  %a19.i.i.i.3 = load i8, ptr %x16.i.i.i.3, align 1, !dbg !12
  %b20.i.i.i.3 = load i8, ptr %y18.i.i.i.3, align 1, !dbg !44
  store i8 %b20.i.i.i.3, ptr %x16.i.i.i.3, align 1, !dbg !48
  store i8 %a19.i.i.i.3, ptr %y18.i.i.i.3, align 1, !dbg !53
  %x16.i.i.i.4 = getelementptr inbounds i8, ptr %a, i64 36, !dbg !55
  %y18.i.i.i.4 = getelementptr inbounds i8, ptr %b, i64 36, !dbg !62
  %a19.i.i.i.4 = load i8, ptr %x16.i.i.i.4, align 1, !dbg !12
  %b20.i.i.i.4 = load i8, ptr %y18.i.i.i.4, align 1, !dbg !44
  store i8 %b20.i.i.i.4, ptr %x16.i.i.i.4, align 1, !dbg !48
  store i8 %a19.i.i.i.4, ptr %y18.i.i.i.4, align 1, !dbg !53
  %x16.i.i.i.5 = getelementptr inbounds i8, ptr %a, i64 37, !dbg !55
  %y18.i.i.i.5 = getelementptr inbounds i8, ptr %b, i64 37, !dbg !62
  %a19.i.i.i.5 = load i8, ptr %x16.i.i.i.5, align 1, !dbg !12
  %b20.i.i.i.5 = load i8, ptr %y18.i.i.i.5, align 1, !dbg !44
  store i8 %b20.i.i.i.5, ptr %x16.i.i.i.5, align 1, !dbg !48
  store i8 %a19.i.i.i.5, ptr %y18.i.i.i.5, align 1, !dbg !53
  %x16.i.i.i.6 = getelementptr inbounds i8, ptr %a, i64 38, !dbg !55
  %y18.i.i.i.6 = getelementptr inbounds i8, ptr %b, i64 38, !dbg !62
  %a19.i.i.i.6 = load i8, ptr %x16.i.i.i.6, align 1, !dbg !12
  %b20.i.i.i.6 = load i8, ptr %y18.i.i.i.6, align 1, !dbg !44
  store i8 %b20.i.i.i.6, ptr %x16.i.i.i.6, align 1, !dbg !48
  store i8 %a19.i.i.i.6, ptr %y18.i.i.i.6, align 1, !dbg !53
  %x16.i.i.i.7 = getelementptr inbounds i8, ptr %a, i64 39, !dbg !55
  %y18.i.i.i.7 = getelementptr inbounds i8, ptr %b, i64 39, !dbg !62
  %a19.i.i.i.7 = load i8, ptr %x16.i.i.i.7, align 1, !dbg !12
  %b20.i.i.i.7 = load i8, ptr %y18.i.i.i.7, align 1, !dbg !44
  store i8 %b20.i.i.i.7, ptr %x16.i.i.i.7, align 1, !dbg !48
  store i8 %a19.i.i.i.7, ptr %y18.i.i.i.7, align 1, !dbg !53
  %x16.i.i.i.8 = getelementptr inbounds i8, ptr %a, i64 40, !dbg !55
  %y18.i.i.i.8 = getelementptr inbounds i8, ptr %b, i64 40, !dbg !62
  %a19.i.i.i.8 = load i8, ptr %x16.i.i.i.8, align 1, !dbg !12
  %b20.i.i.i.8 = load i8, ptr %y18.i.i.i.8, align 1, !dbg !44
  store i8 %b20.i.i.i.8, ptr %x16.i.i.i.8, align 1, !dbg !48
  store i8 %a19.i.i.i.8, ptr %y18.i.i.i.8, align 1, !dbg !53
  %x16.i.i.i.9 = getelementptr inbounds i8, ptr %a, i64 41, !dbg !55
  %y18.i.i.i.9 = getelementptr inbounds i8, ptr %b, i64 41, !dbg !62
  %a19.i.i.i.9 = load i8, ptr %x16.i.i.i.9, align 1, !dbg !12
  %b20.i.i.i.9 = load i8, ptr %y18.i.i.i.9, align 1, !dbg !44
  store i8 %b20.i.i.i.9, ptr %x16.i.i.i.9, align 1, !dbg !48
  store i8 %a19.i.i.i.9, ptr %y18.i.i.i.9, align 1, !dbg !53
  %x16.i.i.i.10 = getelementptr inbounds i8, ptr %a, i64 42, !dbg !55
  %y18.i.i.i.10 = getelementptr inbounds i8, ptr %b, i64 42, !dbg !62
  %a19.i.i.i.10 = load i8, ptr %x16.i.i.i.10, align 1, !dbg !12
  %b20.i.i.i.10 = load i8, ptr %y18.i.i.i.10, align 1, !dbg !44
  store i8 %b20.i.i.i.10, ptr %x16.i.i.i.10, align 1, !dbg !48
  store i8 %a19.i.i.i.10, ptr %y18.i.i.i.10, align 1, !dbg !53
  %x16.i.i.i.11 = getelementptr inbounds i8, ptr %a, i64 43, !dbg !55
  %y18.i.i.i.11 = getelementptr inbounds i8, ptr %b, i64 43, !dbg !62
  %a19.i.i.i.11 = load i8, ptr %x16.i.i.i.11, align 1, !dbg !12
  %b20.i.i.i.11 = load i8, ptr %y18.i.i.i.11, align 1, !dbg !44
  store i8 %b20.i.i.i.11, ptr %x16.i.i.i.11, align 1, !dbg !48
  store i8 %a19.i.i.i.11, ptr %y18.i.i.i.11, align 1, !dbg !53
  ret void, !dbg !65

Metadata

Metadata

Assignees

Labels

A-codegenArea: Code generationC-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.T-libsRelevant to the library team, which will review and decide on the PR/issue.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions