Closed
Description
I was looking at the generated asm for
pub fn foo(s: &str) -> bool {
["foo", "bar", "baz"].iter().any(|&x| s.starts_with(x))
}
to see what happens to the array, and was surprised to see a large function with 5(!) call sites for memcmp
. The generated code doesn't seem to vary with the number of elements in the array.
That got me to compare with the functionally equivalent code:
pub fn foo(s: &str) -> bool {
for x in ["foo", "bar", "baz"].iter() {
if s.starts_with(x) { return true; }
}
return false;
}
which doesn't yield the same assembly at all. The first difference being that the latter doesn't even store the array (only the strs) while the former does.
That got me even further in comparing Iterator::any
with equivalent for
loops, up to the extreme (and stupid):
pub fn foo() -> bool {
[1].iter().any(|&x| x == 1)
}
vs.
pub fn foo() -> bool {
for &x in [1].into_iter() {
if x == 1 { return true; }
}
return false;
}
The latter generates the simplest code possible:
_ZN10playground3foo17h1a4864584facf3ddE:
.cfi_startproc
movb $1, %al
retq
The former not so much:
_ZN10playground3foo17h488d39c1724807f0E:
.cfi_startproc
subq $2, %rsp
.Lcfi0:
.cfi_def_cfa_offset 10
xorl %eax, %eax
leaq ref.5(%rip), %rcx
movq %rsp, %rdx
leaq 1(%rsp), %rsi
.p2align 4, 0x90
.LBB0_1:
cmpq $4, %rax
je .LBB0_2
cmpl $1, (%rax,%rcx)
movq %rdx, %rdi
jne .LBB0_5
movb $1, (%rsp)
movq %rsi, %rdi
.LBB0_5:
movb $0, (%rdi)
addq $4, %rax
cmpb $0, (%rsp)
je .LBB0_1
testb $1, 1(%rsp)
sete %al
jmp .LBB0_7
.LBB0_2:
xorl %eax, %eax
.LBB0_7:
addq $2, %rsp
retq
.Lfunc_end0:
.size _ZN10playground3foo17h488d39c1724807f0E, .Lfunc_end0-_ZN10playground3foo17h488d39c1724807f0E
.cfi_endproc
.type ref.5,@object
.section .rodata.cst4,"aM",@progbits,4
.p2align 2
ref.5:
.long 1
.size ref.5, 4