[RISCV] Unprofitable select vectorization/lowering

This was brought up in discussion on https://github.com/llvm/llvm-project/pull/108419.  This is the root cause of the reported regression on leela from spec2017 in the LTO configuration.

We are failing to recognize shifts disguised as selects in at least two contexts:
1) During vector lowering, as shown in test_vec4.  In this case, the vector select is a disguised vector shift of the mask vector extended to the working type.  Note that the shift amounts are not constant per lane.
2) During SLP vectorization, as shown in test_scalarized.  If passed to SLP, we produce the form in test_vec4.

```
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=riscv64 -mattr=+v,+zba,+zbb < %s | FileCheck %s

define i32 @test_vec4(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
; CHECK-LABEL: test_vec4:
; CHECK:       # %bb.0:
; CHECK-NEXT:    slli a2, a2, 32
; CHECK-NEXT:    slli a3, a3, 48
; CHECK-NEXT:    or a2, a3, a2
; CHECK-NEXT:    slli a1, a1, 16
; CHECK-NEXT:    or a0, a0, a1
; CHECK-NEXT:    or a0, a0, a2
; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; CHECK-NEXT:    vmv.s.x v8, a0
; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT:    vmseq.vi v0, v8, 1
; CHECK-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT:    vmv.v.i v8, 0
; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
; CHECK-NEXT:    addi a0, a0, %lo(.LCPI0_0)
; CHECK-NEXT:    vle32.v v8, (a0), v0.t
; CHECK-NEXT:    vredor.vs v8, v8, v8
; CHECK-NEXT:    vmv.x.s a0, v8
; CHECK-NEXT:    ret
  %t35 = insertelement <4 x i16> poison, i16 %a, i64 0
  %t36 = insertelement <4 x i16> %t35, i16 %b, i64 1
  %t37 = insertelement <4 x i16> %t36, i16 %c, i64 2
  %t38 = insertelement <4 x i16> %t37, i16 %d, i64 3
  %t39 = icmp eq <4 x i16> %t38, <i16 1, i16 1, i16 1, i16 1>
  %t40 = select <4 x i1> %t39, <4 x i32> <i32 524288, i32 262144, i32 131072, i32 65536>, <4 x i32> zeroinitializer
  %t41 = tail call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %t40)
  ret i32 %t41
}

define i32 @test_scalarized(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
; CHECK-LABEL: test_scalarized:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addi a0, a0, -1
; CHECK-NEXT:    seqz a0, a0
; CHECK-NEXT:    addi a1, a1, -1
; CHECK-NEXT:    seqz a1, a1
; CHECK-NEXT:    addi a2, a2, -1
; CHECK-NEXT:    seqz a2, a2
; CHECK-NEXT:    addi a3, a3, -1
; CHECK-NEXT:    seqz a3, a3
; CHECK-NEXT:    slli a0, a0, 19
; CHECK-NEXT:    slli a1, a1, 18
; CHECK-NEXT:    slli a2, a2, 17
; CHECK-NEXT:    slli a3, a3, 16
; CHECK-NEXT:    or a0, a0, a1
; CHECK-NEXT:    or a2, a2, a3
; CHECK-NEXT:    or a0, a0, a2
; CHECK-NEXT:    ret
  %t39.i0 = icmp eq i16 %a, 1
  %t39.i1 = icmp eq i16 %b, 1
  %t39.i2 = icmp eq i16 %c, 1
  %t39.i3 = icmp eq i16 %d, 1
  %t40.i0 = select i1 %t39.i0, i32 524288, i32 0
  %t40.i1 = select i1 %t39.i1, i32 262144, i32 0
  %t40.i2 = select i1 %t39.i2, i32 131072, i32 0
  %t40.i3 = select i1 %t39.i3, i32 65536, i32 0
  %or.rdx0 = or i32 %t40.i0, %t40.i1
  %or.rdx1 = or i32 %t40.i2, %t40.i3
  %or.rdx2 = or i32 %or.rdx0, %or.rdx1
  ret i32 %or.rdx2
}
```
`./opt -S example.ll -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v `


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[RISCV] Unprofitable select vectorization/lowering #109466

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[RISCV] Unprofitable select vectorization/lowering #109466

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions