Skip to content

[InstCombine] Missed optimization after #84628 #100293

Open
@cyyself

Description

@cyyself

Statement

I found a C++ code pattern missed optimization after #84628 that is widely used in Verilator generated C++ codes which consume CIRCT generated verilog code for RTL circuit simulation.

Without being optimized for vector operations, the code is longer. Even worse, when simulating large RTLs, the branch predictor in the current CPUs usually didn't work for its small size. We should make this pattern to generate code like the original one using vector operations to avoid so many hard-to-predict branches.

Reduced reproducer

TL;DR: https://godbolt.org/z/3qK3jeo1E

Look at the following C++ code:

struct a_struct {
    unsigned int value;
    unsigned int some_cond;
    /* selector A */
    unsigned int index;
    unsigned int value_0;
    unsigned int value_1;
    unsigned int value_2;
    unsigned int value_3;
    /* selector B */
    unsigned int value_0_b;
    unsigned int value_1_b;
    unsigned int value_2_b;
    unsigned int value_3_b;
    bool use_0;
    bool use_1;
    bool use_2;
    bool use_3;
};

void some_func(a_struct &a) {
    if (a.some_cond) {
        a.value = ( a.use_0 ? a.value_0_b : 0) | 
                  ( a.use_1 ? a.value_1_b : 0) | 
                  ( a.use_2 ? a.value_2_b : 0) | 
                  ( a.use_3 ? a.value_3_b : 0);
    }
    else {
        a.value = ( 0U == a.index ? a.value_0  : 0) | 
                  ( 1U == a.index ? a.value_1  : 0) | 
                  ( 2U == a.index ? a.value_2  : 0) | 
                  ( 3U == a.index ? a.value_3  : 0);
    }
}

Compile (on x86-64 target): clang++ -O3 -S -c -mllvm --jump-is-expensive test.cpp

Before the commit 56b3222, we will get asm like this all using vector operation:

_Z9some_funcR8a_struct:                 # @_Z9some_funcR8a_struct
        .cfi_startproc
# %bb.0:
        cmpl    $0, 4(%rdi)
        je      .LBB0_3
# %bb.1:
        movd    44(%rdi), %xmm0                 # xmm0 = mem[0],zero,zero,zero
        punpcklbw       %xmm0, %xmm0            # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
        punpcklwd       %xmm0, %xmm0            # xmm0 = xmm0[0,0,1,1,2,2,3,3]
        pxor    %xmm1, %xmm1
        pcmpeqb %xmm0, %xmm1
        movdqu  28(%rdi), %xmm0
        pandn   %xmm0, %xmm1
        pshufd  $238, %xmm1, %xmm0              # xmm0 = xmm1[2,3,2,3]
        por     %xmm1, %xmm0
        pshufd  $85, %xmm0, %xmm1               # xmm1 = xmm0[1,1,1,1]
        por     %xmm0, %xmm1
        movd    %xmm1, %eax
        movl    %eax, (%rdi)
        retq
.LBB0_3:
        movdqu  8(%rdi), %xmm0
        movdqu  12(%rdi), %xmm1
        pshufd  $0, %xmm0, %xmm0                # xmm0 = xmm0[0,0,0,0]
        pcmpeqd .LCPI0_0(%rip), %xmm0
        pand    %xmm1, %xmm0
        pshufd  $238, %xmm0, %xmm1              # xmm1 = xmm0[2,3,2,3]
        por     %xmm0, %xmm1
        pshufd  $85, %xmm1, %xmm0               # xmm0 = xmm1[1,1,1,1]
        por     %xmm1, %xmm0
        movd    %xmm0, %eax
        movl    %eax, (%rdi)
        retq

However, After commit 56b3222, we will get a bad code like this:

_Z9some_funcR8a_struct:                 # @_Z9some_funcR8a_struct
        .cfi_startproc
# %bb.0:
        cmpl    $0, 4(%rdi)
        je      .LBB0_8
# %bb.1:
        xorl    %ecx, %ecx
        cmpb    $0, 44(%rdi)
        movl    $0, %edx
        jne     .LBB0_2
# %bb.3:
        cmpb    $0, 45(%rdi)
        movl    $0, %eax
        jne     .LBB0_4
.LBB0_5:
        orl     %edx, %eax
        cmpb    $0, 46(%rdi)
        je      .LBB0_7
.LBB0_6:
        movl    36(%rdi), %ecx
.LBB0_7:
        orl     %ecx, %eax
        movzbl  47(%rdi), %edx
        movl    $40, %ecx
        xorl    %esi, %esi
        testb   %dl, %dl
        je      .LBB0_17
.LBB0_16:
        movl    (%rdi,%rcx), %esi
.LBB0_17:
        orl     %esi, %eax
        movl    %eax, (%rdi)
        retq
.LBB0_8:
        movl    8(%rdi), %ecx
        xorl    %edx, %edx
        movl    $0, %esi
        testl   %ecx, %ecx
        je      .LBB0_9
# %bb.10:
        movl    $0, %eax
        cmpl    $1, %ecx
        je      .LBB0_11
.LBB0_12:
        orl     %esi, %eax
        cmpl    $2, %ecx
        jne     .LBB0_14
.LBB0_13:
        movl    20(%rdi), %edx
.LBB0_14:
        orl     %edx, %eax
        cmpl    $3, %ecx
        sete    %dl
        movl    $24, %ecx
        xorl    %esi, %esi
        testb   %dl, %dl
        jne     .LBB0_16
        jmp     .LBB0_17
.LBB0_2:
        movl    28(%rdi), %edx
        cmpb    $0, 45(%rdi)
        movl    $0, %eax
        je      .LBB0_5
.LBB0_4:
        movl    32(%rdi), %eax
        orl     %edx, %eax
        cmpb    $0, 46(%rdi)
        jne     .LBB0_6
        jmp     .LBB0_7
.LBB0_9:
        movl    12(%rdi), %esi
        movl    $0, %eax
        cmpl    $1, %ecx
        jne     .LBB0_12
.LBB0_11:
        movl    16(%rdi), %eax
        orl     %esi, %eax
        cmpl    $2, %ecx
        je      .LBB0_13
        jmp     .LBB0_14

I have tried to revert the commit 56b3222 based on the recent main branch commit a51d263, then it fixed.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions