Description
Thank you for adding portable SIMD and boolean vectors!
In stdsimd
, any()
and all()
are based LLVM cross-ISA on bitwise reductions across the vector, so these operations fail to use the type system information that is the reason for their existence: The information that all the bits of each lane are the same. I.e. in b8x16
, each lane is either 0 or 0xFF.
This means that looking at all the bits is unnecessary, since for each lane, examining any one bit is sufficient. In the SSE2 case, _mm_movemask_epi8
takes one bit from each lane of a u8x16
. It's what the simd
crate uses.
I've uploaded a demo crate that exports uninlined versions of any()
and all()
on u8x16
using both simd
and stdsimd
. The generated code for the simd
crate versions is much simpler. Code generated using stdsimd
should be as good.
.text
.file "anyall0-66776763bbd7db1b14e447f414e2cf9a.rs"
.section .text.stdsimd_any_8x16,"ax",@progbits
.globl stdsimd_any_8x16
.p2align 4, 0x90
.type stdsimd_any_8x16,@function
stdsimd_any_8x16:
.cfi_startproc
movdqa (%rdi), %xmm0
pshufd $78, %xmm0, %xmm1
por %xmm0, %xmm1
pshufd $229, %xmm1, %xmm0
por %xmm1, %xmm0
movdqa %xmm0, %xmm1
psrld $16, %xmm1
por %xmm0, %xmm1
movdqa %xmm1, %xmm0
psrlw $8, %xmm0
por %xmm1, %xmm0
movd %xmm0, %eax
testb %al, %al
setne %al
retq
.Lfunc_end0:
.size stdsimd_any_8x16, .Lfunc_end0-stdsimd_any_8x16
.cfi_endproc
.section .text.stdsimd_all_8x16,"ax",@progbits
.globl stdsimd_all_8x16
.p2align 4, 0x90
.type stdsimd_all_8x16,@function
stdsimd_all_8x16:
.cfi_startproc
movdqa (%rdi), %xmm0
pshufd $78, %xmm0, %xmm1
pand %xmm0, %xmm1
pshufd $229, %xmm1, %xmm0
pand %xmm1, %xmm0
movdqa %xmm0, %xmm1
psrld $16, %xmm1
pand %xmm0, %xmm1
movdqa %xmm1, %xmm0
psrlw $8, %xmm0
pand %xmm1, %xmm0
movd %xmm0, %eax
testb %al, %al
setne %al
retq
.Lfunc_end1:
.size stdsimd_all_8x16, .Lfunc_end1-stdsimd_all_8x16
.cfi_endproc
.section .text.simd_any_8x16,"ax",@progbits
.globl simd_any_8x16
.p2align 4, 0x90
.type simd_any_8x16,@function
simd_any_8x16:
.cfi_startproc
movdqa (%rdi), %xmm0
pmovmskb %xmm0, %eax
testl %eax, %eax
setne %al
retq
.Lfunc_end2:
.size simd_any_8x16, .Lfunc_end2-simd_any_8x16
.cfi_endproc
.section .text.simd_all_8x16,"ax",@progbits
.globl simd_all_8x16
.p2align 4, 0x90
.type simd_all_8x16,@function
simd_all_8x16:
.cfi_startproc
movdqa (%rdi), %xmm0
pmovmskb %xmm0, %eax
cmpl $65535, %eax
sete %al
retq
.Lfunc_end3:
.size simd_all_8x16, .Lfunc_end3-simd_all_8x16
.cfi_endproc
.section ".note.GNU-stack","",@progbits