Skip to content

The new step_by is too much slow #43064

Closed
@leonardo-m

Description

@leonardo-m

As I explained in #42534 the new step_by can't be used:

#![feature(iterator_step_by)]

#[inline(never)]
pub fn first_foo(mut a: i32, b: i32) -> i32 {
    let mut tot = 0;

    while a < b {
        tot += a;
        a += 3;
    }
    tot
}

#[inline(never)]
pub fn second_foo(x: std::ops::Range<i32>) -> i32 {
    let mut tot = 0;

    #[allow(deprecated)]
    for a in x.step_by(3) {
        tot += a;
    }
    tot
}

fn main() {
    println!("{}", first_foo(0, 1000));
    println!("{}", first_foo(0, 100000));
    println!("{}", second_foo(0 .. 1000));
    println!("{}", second_foo(0 .. 100000));
}

Compiling with:
rustc -O --emit asm test.rs

It gives:

_ZN4test9first_foo17he3a0bddb42457919E:
    testl   %ecx, %ecx
    jle .LBB0_1
    xorl    %edx, %edx
    xorl    %eax, %eax
    .p2align    4, 0x90
.LBB0_4:
    addl    %edx, %eax
    addl    $3, %edx
    cmpl    %ecx, %edx
    jl  .LBB0_4
    jmp .LBB0_2
.LBB0_1:
    xorl    %eax, %eax
.LBB0_2:
    retq

_ZN4test10second_foo17hd5d21559ba84e6d1E:
    movq    %rcx, %r9
    shrq    $32, %r9
    xorl    %r8d, %r8d
    xorl    %r10d, %r10d
    xorl    %eax, %eax
    testb   $1, %r10b
    je  .LBB1_11
    jmp .LBB1_2
    .p2align    4, 0x90
.LBB1_7:
    shrq    $32, %rdx
    addl    %eax, %edx
    movb    $1, %r10b
    movl    %edx, %eax
    testb   $1, %r10b
    je  .LBB1_11
.LBB1_2:
    cmpl    %r9d, %ecx
    jge .LBB1_5
    leal    1(%rcx), %edx
    cmpl    %r9d, %edx
    jge .LBB1_4
    leal    2(%rcx), %edx
    cmpl    %r9d, %edx
    jge .LBB1_9
    addl    $3, %ecx
    shlq    $32, %rdx
    movl    $1, %r10d
    jmp .LBB1_6
    .p2align    4, 0x90
.LBB1_11:
    movq    %rcx, %r10
    shlq    $32, %r10
    xorl    %edx, %edx
    cmpl    %r9d, %ecx
    setl    %dl
    cmovgeq %r8, %r10
    addl    %edx, %ecx
    jmp .LBB1_6
    .p2align    4, 0x90
.LBB1_4:
    movl    %edx, %ecx
    jmp .LBB1_5
.LBB1_9:
    movl    %edx, %ecx
    .p2align    4, 0x90
.LBB1_5:
    xorl    %edx, %edx
    xorl    %r10d, %r10d
.LBB1_6:
    orq %r10, %rdx
    testl   %edx, %edx
    jne .LBB1_7
    retq

In a function like second_foo() the old step_by gave something like:

_ZN5test210second_foo17h5b0eb3168418bdd1E:
    movq    %rcx, %rdx
    shrq    $32, %rdx
    xorl    %eax, %eax
    cmpl    %edx, %ecx
    jge .LBB1_3
    xorl    %eax, %eax
    .p2align    4, 0x90
.LBB1_2:
    addl    %ecx, %eax
    addl    $3, %ecx
    cmovol  %edx, %ecx ; what is this for?
    cmpl    %edx, %ecx
    jl  .LBB1_2
.LBB1_3:
    retq

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-enhancementCategory: An issue proposing an enhancement or a PR with one.I-slowIssue: Problems and improvements with respect to performance of generated code.T-libs-apiRelevant to the library API team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions