Skip to content

Boolean vectors should use hand-tuned any() and all() #362

Closed
@hsivonen

Description

@hsivonen

Thank you for adding portable SIMD and boolean vectors!

In stdsimd, any() and all() are based LLVM cross-ISA on bitwise reductions across the vector, so these operations fail to use the type system information that is the reason for their existence: The information that all the bits of each lane are the same. I.e. in b8x16, each lane is either 0 or 0xFF.

This means that looking at all the bits is unnecessary, since for each lane, examining any one bit is sufficient. In the SSE2 case, _mm_movemask_epi8 takes one bit from each lane of a u8x16. It's what the simd crate uses.

I've uploaded a demo crate that exports uninlined versions of any() and all() on u8x16 using both simd and stdsimd. The generated code for the simd crate versions is much simpler. Code generated using stdsimd should be as good.

    .text
    .file   "anyall0-66776763bbd7db1b14e447f414e2cf9a.rs"
    .section    .text.stdsimd_any_8x16,"ax",@progbits
    .globl  stdsimd_any_8x16
    .p2align    4, 0x90
    .type   stdsimd_any_8x16,@function
stdsimd_any_8x16:
    .cfi_startproc
    movdqa  (%rdi), %xmm0
    pshufd  $78, %xmm0, %xmm1
    por %xmm0, %xmm1
    pshufd  $229, %xmm1, %xmm0
    por %xmm1, %xmm0
    movdqa  %xmm0, %xmm1
    psrld   $16, %xmm1
    por %xmm0, %xmm1
    movdqa  %xmm1, %xmm0
    psrlw   $8, %xmm0
    por %xmm1, %xmm0
    movd    %xmm0, %eax
    testb   %al, %al
    setne   %al
    retq
.Lfunc_end0:
    .size   stdsimd_any_8x16, .Lfunc_end0-stdsimd_any_8x16
    .cfi_endproc

    .section    .text.stdsimd_all_8x16,"ax",@progbits
    .globl  stdsimd_all_8x16
    .p2align    4, 0x90
    .type   stdsimd_all_8x16,@function
stdsimd_all_8x16:
    .cfi_startproc
    movdqa  (%rdi), %xmm0
    pshufd  $78, %xmm0, %xmm1
    pand    %xmm0, %xmm1
    pshufd  $229, %xmm1, %xmm0
    pand    %xmm1, %xmm0
    movdqa  %xmm0, %xmm1
    psrld   $16, %xmm1
    pand    %xmm0, %xmm1
    movdqa  %xmm1, %xmm0
    psrlw   $8, %xmm0
    pand    %xmm1, %xmm0
    movd    %xmm0, %eax
    testb   %al, %al
    setne   %al
    retq
.Lfunc_end1:
    .size   stdsimd_all_8x16, .Lfunc_end1-stdsimd_all_8x16
    .cfi_endproc

    .section    .text.simd_any_8x16,"ax",@progbits
    .globl  simd_any_8x16
    .p2align    4, 0x90
    .type   simd_any_8x16,@function
simd_any_8x16:
    .cfi_startproc
    movdqa  (%rdi), %xmm0
    pmovmskb    %xmm0, %eax
    testl   %eax, %eax
    setne   %al
    retq
.Lfunc_end2:
    .size   simd_any_8x16, .Lfunc_end2-simd_any_8x16
    .cfi_endproc

    .section    .text.simd_all_8x16,"ax",@progbits
    .globl  simd_all_8x16
    .p2align    4, 0x90
    .type   simd_all_8x16,@function
simd_all_8x16:
    .cfi_startproc
    movdqa  (%rdi), %xmm0
    pmovmskb    %xmm0, %eax
    cmpl    $65535, %eax
    sete    %al
    retq
.Lfunc_end3:
    .size   simd_all_8x16, .Lfunc_end3-simd_all_8x16
    .cfi_endproc


    .section    ".note.GNU-stack","",@progbits

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions