Skip to content

[regression] trunk emits more mov than llvm-18 at -Oz #111081

Open
@dcci

Description

@dcci

Found this regression at -Oz

long patatino() {
    long x = 0;
    for (int i = 0; i < 2; ++i) {
            if (x % 2 == 0) {
                x += 2;
            } else {
                x += 1;
            }
            if ((i % 13 == 0 && x % 17 == 0) || (i % 19 == 0 && x % 23 == 0)) {
                x += 7;
            }
    }
    return x;
}

LLVM-18 emits:

Disassembly of section .text:

0000000000000000 <_Z8patatinov>:
   0:   6a 02                   push   $0x2
   2:   41 59                   pop    %r9
   4:   31 c9                   xor    %ecx,%ecx
   6:   6a 11                   push   $0x11
   8:   5e                      pop    %rsi
   9:   6a 17                   push   $0x17
   b:   5f                      pop    %rdi
   c:   45 89 c8                mov    %r9d,%r8d
   f:   41 83 e8 01             sub    $0x1,%r8d
  13:   72 37                   jb     4c <_Z8patatinov+0x4c>
  15:   48 83 e1 fe             and    $0xfffffffffffffffe,%rcx
  19:   48 83 c1 02             add    $0x2,%rcx
  1d:   48 89 c8                mov    %rcx,%rax
  20:   48 99                   cqto   
  22:   48 f7 fe                idiv   %rsi
  25:   41 83 f9 02             cmp    $0x2,%r9d
  29:   75 05                   jne    30 <_Z8patatinov+0x30>
  2b:   48 85 d2                test   %rdx,%rdx
  2e:   74 13                   je     43 <_Z8patatinov+0x43>
  30:   48 89 c8                mov    %rcx,%rax
  33:   48 99                   cqto   
  35:   48 f7 ff                idiv   %rdi
  38:   41 83 f9 02             cmp    $0x2,%r9d
  3c:   75 09                   jne    47 <_Z8patatinov+0x47>
  3e:   48 85 d2                test   %rdx,%rdx
  41:   75 04                   jne    47 <_Z8patatinov+0x47>
  43:   48 83 c1 07             add    $0x7,%rcx
  47:   45 89 c1                mov    %r8d,%r9d
  4a:   eb c3                   jmp    f <_Z8patatinov+0xf>
  4c:   48 89 c8                mov    %rcx,%rax
  4f:   c3                      ret    
[[email protected] ~/reducer]$ size /tmp/old 
   text    data     bss     dec     hex filename
    144       0       0     144      90 /tmp/old

and trunk emits:

Disassembly of section .text:

0000000000000000 <_Z8patatinov>:
   0:   6a 02                   push   $0x2
   2:   41 5a                   pop    %r10
   4:   31 c9                   xor    %ecx,%ecx
   6:   6a 11                   push   $0x11
   8:   5f                      pop    %rdi
   9:   6a 17                   push   $0x17
   b:   41 58                   pop    %r8
   d:   45 89 d1                mov    %r10d,%r9d
  10:   41 83 e9 01             sub    $0x1,%r9d
  14:   72 3d                   jb     53 <_Z8patatinov+0x53>
  16:   48 83 e1 fe             and    $0xfffffffffffffffe,%rcx
  1a:   48 8d 71 02             lea    0x2(%rcx),%rsi
  1e:   48 89 f0                mov    %rsi,%rax
  21:   48 99                   cqto   
  23:   48 f7 ff                idiv   %rdi
  26:   41 83 fa 02             cmp    $0x2,%r10d
  2a:   75 05                   jne    31 <_Z8patatinov+0x31>
  2c:   48 85 d2                test   %rdx,%rdx
  2f:   74 13                   je     44 <_Z8patatinov+0x44>
  31:   48 89 f0                mov    %rsi,%rax
  34:   48 99                   cqto   
  36:   49 f7 f8                idiv   %r8
  39:   41 83 fa 02             cmp    $0x2,%r10d
  3d:   75 0c                   jne    4b <_Z8patatinov+0x4b>
  3f:   48 85 d2                test   %rdx,%rdx
  42:   75 07                   jne    4b <_Z8patatinov+0x4b>
  44:   48 83 c1 09             add    $0x9,%rcx
  48:   48 89 ce                mov    %rcx,%rsi
  4b:   45 89 ca                mov    %r9d,%r10d
  4e:   48 89 f1                mov    %rsi,%rcx
  51:   eb bd                   jmp    10 <_Z8patatinov+0x10>
  53:   48 89 c8                mov    %rcx,%rax
  56:   c3                      ret    
[[email protected] ~/tools/bin]$ size /tmp/new 
   text    data     bss     dec     hex filename
    151       0       0     151      97 /tmp/new

Needs to be bisected, but there are two problems:

pop %r8 is 2-bytes instead of 1 for pop %rdi -- maybe a different regalloc decision.

trunk has to do more moves:

  48:   48 89 ce                mov    %rcx,%rsi
  4b:   45 89 ca                mov    %r9d,%r10d
  4e:   48 89 f1                mov    %rsi,%rcx

vs

  47:   45 89 c1                mov    %r8d,%r9d

cc: @RKSimon for x86

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions