Open
Description
Found this regression at -Oz
long patatino() {
long x = 0;
for (int i = 0; i < 2; ++i) {
if (x % 2 == 0) {
x += 2;
} else {
x += 1;
}
if ((i % 13 == 0 && x % 17 == 0) || (i % 19 == 0 && x % 23 == 0)) {
x += 7;
}
}
return x;
}
LLVM-18 emits:
Disassembly of section .text:
0000000000000000 <_Z8patatinov>:
0: 6a 02 push $0x2
2: 41 59 pop %r9
4: 31 c9 xor %ecx,%ecx
6: 6a 11 push $0x11
8: 5e pop %rsi
9: 6a 17 push $0x17
b: 5f pop %rdi
c: 45 89 c8 mov %r9d,%r8d
f: 41 83 e8 01 sub $0x1,%r8d
13: 72 37 jb 4c <_Z8patatinov+0x4c>
15: 48 83 e1 fe and $0xfffffffffffffffe,%rcx
19: 48 83 c1 02 add $0x2,%rcx
1d: 48 89 c8 mov %rcx,%rax
20: 48 99 cqto
22: 48 f7 fe idiv %rsi
25: 41 83 f9 02 cmp $0x2,%r9d
29: 75 05 jne 30 <_Z8patatinov+0x30>
2b: 48 85 d2 test %rdx,%rdx
2e: 74 13 je 43 <_Z8patatinov+0x43>
30: 48 89 c8 mov %rcx,%rax
33: 48 99 cqto
35: 48 f7 ff idiv %rdi
38: 41 83 f9 02 cmp $0x2,%r9d
3c: 75 09 jne 47 <_Z8patatinov+0x47>
3e: 48 85 d2 test %rdx,%rdx
41: 75 04 jne 47 <_Z8patatinov+0x47>
43: 48 83 c1 07 add $0x7,%rcx
47: 45 89 c1 mov %r8d,%r9d
4a: eb c3 jmp f <_Z8patatinov+0xf>
4c: 48 89 c8 mov %rcx,%rax
4f: c3 ret
[[email protected] ~/reducer]$ size /tmp/old
text data bss dec hex filename
144 0 0 144 90 /tmp/old
and trunk emits:
Disassembly of section .text:
0000000000000000 <_Z8patatinov>:
0: 6a 02 push $0x2
2: 41 5a pop %r10
4: 31 c9 xor %ecx,%ecx
6: 6a 11 push $0x11
8: 5f pop %rdi
9: 6a 17 push $0x17
b: 41 58 pop %r8
d: 45 89 d1 mov %r10d,%r9d
10: 41 83 e9 01 sub $0x1,%r9d
14: 72 3d jb 53 <_Z8patatinov+0x53>
16: 48 83 e1 fe and $0xfffffffffffffffe,%rcx
1a: 48 8d 71 02 lea 0x2(%rcx),%rsi
1e: 48 89 f0 mov %rsi,%rax
21: 48 99 cqto
23: 48 f7 ff idiv %rdi
26: 41 83 fa 02 cmp $0x2,%r10d
2a: 75 05 jne 31 <_Z8patatinov+0x31>
2c: 48 85 d2 test %rdx,%rdx
2f: 74 13 je 44 <_Z8patatinov+0x44>
31: 48 89 f0 mov %rsi,%rax
34: 48 99 cqto
36: 49 f7 f8 idiv %r8
39: 41 83 fa 02 cmp $0x2,%r10d
3d: 75 0c jne 4b <_Z8patatinov+0x4b>
3f: 48 85 d2 test %rdx,%rdx
42: 75 07 jne 4b <_Z8patatinov+0x4b>
44: 48 83 c1 09 add $0x9,%rcx
48: 48 89 ce mov %rcx,%rsi
4b: 45 89 ca mov %r9d,%r10d
4e: 48 89 f1 mov %rsi,%rcx
51: eb bd jmp 10 <_Z8patatinov+0x10>
53: 48 89 c8 mov %rcx,%rax
56: c3 ret
[[email protected] ~/tools/bin]$ size /tmp/new
text data bss dec hex filename
151 0 0 151 97 /tmp/new
Needs to be bisected, but there are two problems:
pop %r8
is 2-bytes instead of 1 for pop %rdi
-- maybe a different regalloc decision.
trunk has to do more moves:
48: 48 89 ce mov %rcx,%rsi
4b: 45 89 ca mov %r9d,%r10d
4e: 48 89 f1 mov %rsi,%rcx
vs
47: 45 89 c1 mov %r8d,%r9d
cc: @RKSimon for x86