Closed
Description
The benchmarks for rand
and for my crate small-rngs
have regressed a lot with recent nightlies. A few extra branches are introduced at the end of the benchmark loop.
Relevant assembly with nightly 0077d12 2017-12-14:
│ → callq std::time::Instant::now
│ xor %r8d,%r8d
│ lea 0xb0(%rsp),%rdi
│ lea 0x8(%rsp),%rcx
│ nop
│4b0: mov $0x3e8,%esi
│ data16 nopw %cs:0x0(%rax,%rax,1)
22,85 │4c0: mov %rbp,%rax
8,48 │ mul %r13
│ xor %rbp,%r14
1,01 │ rol $0x37,%rbp
│ mov %rbp,%rbx
23,14 │ xor %r14,%rbx
│ mov %r14,%rbp
1,29 │ shl $0xe,%rbp
2,06 │ xor %rbx,%rbp
20,55 │ rol $0x24,%r14
0,05 │ shld $0x20,%rax,%rdx
1,36 │ mov %rdx,0x8(%rsp)
3,13 │ dec %rsi
│ ↑ jne 4c0
0,03 │ inc %r8
0,02 │ cmp %r12,%r8
│ ↑ jne 4b0
│ mov %r15,%rsi
│ → callq std::time::Instant::elapsed
With nightly 77efd68 2017-12-15:
│ → callq std::time::Instant::now
│ mov %rax,0x10(%rsp)
│ mov %rdx,0x18(%rsp)
│ xor %eax,%eax
│ movabs $0x2545f4914f6cdd1d,%r9
│ mov %rsp,%r10
│ cmp %r14,%rax
│ ↓ jb 48
│ ↓ jmp 51
│ data16 data16 nopw %cs:0x0(%rax,%rax,1)
│ 40: mov %r8,%rax
│ cmp %r14,%rax
│ ↓ jae 51
0,02 │ 48: mov %rax,%r8
│ add $0x1,%r8
│ ↓ jae 60
│ 51: mov %rax,%r8
│ xor %ecx,%ecx
│ xor %eax,%eax
│ ↓ jmp 6d
│ nop
│ 60: movq $0x1,(%rsp)
│ mov $0x1,%ecx
0,02 │ 6d: mov %rax,(%rsp,%rcx,8)
0,01 │ cmpq $0x1,(%rsp)
│ ↓ jne 10d
│ xor %eax,%eax
│ cmp $0x3e7,%rax
│ ↓ jbe d3
│ ↓ jmp dc
│ nop
0,00 │ 90: mov (%r15),%rdi
9,24 │ mov (%rdi),%rcx
│ mov %rcx,%rax
9,94 │ mul %r9
0,00 │ mov 0x8(%rdi),%rsi
0,84 │ xor %rcx,%rsi
0,01 │ rol $0x37,%rcx
14,55 │ xor %rsi,%rcx
│ mov %rsi,%rbx
1,64 │ shl $0xe,%rbx
6,80 │ xor %rcx,%rbx
6,76 │ mov %rbx,(%rdi)
0,07 │ rol $0x24,%rsi
5,16 │ mov %rsi,0x8(%rdi)
0,22 │ shld $0x20,%rax,%rdx
6,97 │ mov %rdx,(%rsp)
│ mov %r11,%rax
5,33 │ cmp $0x3e7,%rax
│ ↓ ja dc
│ d3: mov %rax,%r11
13,70 │ add $0x1,%r11
│ ↓ jae f0
│ dc: mov %rax,%r11
0,02 │ xor %ecx,%ecx
│ xor %eax,%eax
0,01 │ ↓ jmp fd
│ data16 nopw %cs:0x0(%rax,%rax,1)
│ f0: movq $0x1,(%rsp)
6,77 │ mov $0x1,%ecx
8,61 │ fd: mov %rax,(%rsp,%rcx,8)
0,13 │ cmpq $0x1,(%rsp)
3,19 │ ↑ je 90
│ ↑ jmpq 40
│10d: lea 0x10(%rsp),%rdi
│ → callq std::time::Instant::elapsed