Open
Description
In my code I've essentially stopped using loops with ... (intervals closed on the right, recently written with the syntax ..=) because they give performance problems. This is a simple example that shows the problem:
#![feature(inclusive_range_syntax)]
#![allow(private_no_mangle_fns)]
#[inline(never)]
#[no_mangle]
fn foo1(n: u64) -> u64 {
let mut count = 0;
for _ in 0 .. n {
for j in (0 .. n + 1).rev() {
count += j;
}
}
count
}
#[inline(never)]
#[no_mangle]
fn foo2(n: u64) -> u64 {
let mut count = 0;
for _ in 0 .. n {
for j in (0 ..= n).rev() {
count += j;
}
}
count
}
fn main() {
let n: u64 = std::env::args().nth(1).unwrap().parse().unwrap();
let what: u32 = std::env::args().nth(2).unwrap().parse().unwrap();
match what {
1 => println!("{}", foo1(n)),
2 => println!("{}", foo2(n)),
_ => panic!(),
}
}
Compiled with the last Nightly:
rustc 1.22.0-nightly (d6d711dd8 2017-10-10)
binary: rustc
commit-hash: d6d711dd8f7ad5885294b8e1f0009a23dc1f8b1f
commit-date: 2017-10-10
host: x86_64-pc-windows-gnu
release: 1.22.0-nightly
LLVM version: 4.0
Compiled with:
rustc -O test.rs
Running it calling foo1 takes 0.02 seconds:
...>elaps test 100000 1
500005000000000
Running it calling foo2 takes about 13.65 seconds:
...>elaps test 100000 2
500005000000000
The asm I am seeing using "--emit asm"
foo1:
testq %rcx, %rcx
je .LBB5_1
movq %rcx, %r8
imulq %r8, %r8
leaq -1(%rcx), %rdx
movq %rcx, %rax
mulq %rdx
shldq $63, %rax, %rdx
subq %rdx, %r8
cmpq $3, %rcx
jbe .LBB5_3
movq %rcx, %rdx
andq $-4, %rdx
je .LBB5_3
movd %r8, %xmm0
pshufd $68, %xmm0, %xmm2
leaq -4(%rdx), %r9
movl %r9d, %eax
shrl $2, %eax
incl %eax
andq $3, %rax
je .LBB5_8
cmpq $-1, %rcx
pxor %xmm0, %xmm0
pxor %xmm3, %xmm3
je .LBB5_11
movdqa %xmm2, %xmm3
.LBB5_11:
negq %rax
xorl %r10d, %r10d
pxor %xmm1, %xmm1
.p2align 4, 0x90
.LBB5_12:
paddq %xmm3, %xmm0
paddq %xmm3, %xmm1
addq $4, %r10
incq %rax
jne .LBB5_12
jmp .LBB5_13
.LBB5_3:
xorl %eax, %eax
xorl %edx, %edx
.LBB5_4:
xorl %r9d, %r9d
cmpq $-1, %rcx
cmoveq %r9, %r8
.p2align 4, 0x90
.LBB5_5:
incq %rdx
addq %r8, %rax
cmpq %rcx, %rdx
jb .LBB5_5
.LBB5_19:
retq
.LBB5_1:
xorl %eax, %eax
retq
.LBB5_8:
xorl %r10d, %r10d
pxor %xmm0, %xmm0
pxor %xmm1, %xmm1
.LBB5_13:
cmpq $12, %r9
jb .LBB5_18
cmpq $-1, %rcx
pxor %xmm3, %xmm3
je .LBB5_16
movdqa %xmm2, %xmm3
.LBB5_16:
movq %rdx, %rax
subq %r10, %rax
.p2align 4, 0x90
.LBB5_17:
paddq %xmm3, %xmm0
paddq %xmm3, %xmm1
paddq %xmm3, %xmm0
paddq %xmm3, %xmm1
paddq %xmm3, %xmm0
paddq %xmm3, %xmm1
paddq %xmm3, %xmm0
paddq %xmm3, %xmm1
addq $-16, %rax
jne .LBB5_17
.LBB5_18:
paddq %xmm1, %xmm0
pshufd $78, %xmm0, %xmm1
paddq %xmm0, %xmm1
movd %xmm1, %rax
cmpq %rcx, %rdx
jne .LBB5_4
jmp .LBB5_19
foo2:
pushq %rsi
pushq %rdi
pushq %rbx
testq %rcx, %rcx
je .LBB6_1
testb $1, %cl
jne .LBB6_4
xorl %eax, %eax
xorl %r8d, %r8d
cmpq $1, %rcx
jne .LBB6_11
jmp .LBB6_23
.LBB6_1:
xorl %eax, %eax
jmp .LBB6_23
.LBB6_4:
xorl %r8d, %r8d
movq $-1, %r9
xorl %r10d, %r10d
movq %rcx, %r11
xorl %eax, %eax
jmp .LBB6_5
.p2align 4, 0x90
.LBB6_8:
addq %r11, %rax
movq %rdi, %r10
movq %rdx, %r11
.LBB6_5:
cmpq %r11, %r10
movl $1, %esi
cmovbq %r9, %rsi
cmoveq %r8, %rsi
testq %rsi, %rsi
movl $1, %edi
movl $0, %edx
je .LBB6_8
cmpq $-1, %rsi
jne .LBB6_9
leaq -1(%r11), %rdx
movq %r10, %rdi
jmp .LBB6_8
.LBB6_9:
movl $1, %r8d
cmpq $1, %rcx
je .LBB6_23
.LBB6_11:
xorl %r9d, %r9d
movq $-1, %r10
.p2align 4, 0x90
.LBB6_12:
xorl %r11d, %r11d
movq %rcx, %rdx
jmp .LBB6_13
.p2align 4, 0x90
.LBB6_16:
addq %rdx, %rax
movq %rbx, %r11
movq %rsi, %rdx
.LBB6_13:
cmpq %rdx, %r11
movl $1, %edi
cmovbq %r10, %rdi
cmoveq %r9, %rdi
testq %rdi, %rdi
movl $1, %ebx
movl $0, %esi
je .LBB6_16
cmpq $-1, %rdi
jne .LBB6_17
leaq -1(%rdx), %rsi
movq %r11, %rbx
jmp .LBB6_16
.p2align 4, 0x90
.LBB6_17:
addq $2, %r8
xorl %r11d, %r11d
movq %rcx, %rdx
jmp .LBB6_18
.p2align 4, 0x90
.LBB6_21:
addq %rdx, %rax
movq %rbx, %r11
movq %rsi, %rdx
.LBB6_18:
cmpq %rdx, %r11
movl $1, %edi
cmovbq %r10, %rdi
cmoveq %r9, %rdi
testq %rdi, %rdi
movl $1, %ebx
movl $0, %esi
je .LBB6_21
cmpq $-1, %rdi
jne .LBB6_22
leaq -1(%rdx), %rsi
movq %r11, %rbx
jmp .LBB6_21
.p2align 4, 0x90
.LBB6_22:
cmpq %rcx, %r8
jb .LBB6_12
.LBB6_23:
popq %rbx
popq %rdi
popq %rsi
retq