Skip to content

Commit dc5dca1

Browse files
authored
[RISCV][Isel] Remove redundant vmerge for the scalable vwadd(u).wv (#80079)
Similar to #78403, but for scalable `vwadd(u).wv`, given that #76785 is recommited. ### Code ``` define <vscale x 8 x i64> @vwadd_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) { %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer) %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64> %ret = add <vscale x 8 x i64> %sa, %y ret <vscale x 8 x i64> %ret } ``` ### Before this patch [Compiler Explorer](https://godbolt.org/z/xsoa5xPrd) ``` vwadd_wv_mask_v8i32: li a0, 42 vsetvli a1, zero, e32, m4, ta, ma vmslt.vx v0, v8, a0 vmv.v.i v12, 0 vmerge.vvm v24, v12, v8, v0 vwadd.wv v8, v16, v24 ret ``` ### After this patch ``` vwadd_wv_mask_v8i32: li a0, 42 vsetvli a1, zero, e32, m4, ta, ma vmslt.vx v0, v8, a0 vsetvli zero, zero, e32, m4, tu, mu vwadd.wv v16, v16, v8, v0.t vmv8r.v v8, v16 ret ```
1 parent d71831a commit dc5dca1

File tree

3 files changed

+109
-13
lines changed

3 files changed

+109
-13
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13776,8 +13776,11 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
1377613776

1377713777
SDValue Y = N->getOperand(0);
1377813778
SDValue MergeOp = N->getOperand(1);
13779-
if (MergeOp.getOpcode() != RISCVISD::VMERGE_VL)
13779+
unsigned MergeOpc = MergeOp.getOpcode();
13780+
13781+
if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
1378013782
return SDValue();
13783+
1378113784
SDValue X = MergeOp->getOperand(1);
1378213785

1378313786
if (!MergeOp.hasOneUse())
@@ -13795,11 +13798,12 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
1379513798

1379613799
// False value of MergeOp should be all zeros
1379713800
SDValue Z = MergeOp->getOperand(2);
13798-
if (Z.getOpcode() != ISD::INSERT_SUBVECTOR)
13799-
return SDValue();
13800-
if (!ISD::isBuildVectorAllZeros(Z.getOperand(1).getNode()))
13801-
return SDValue();
13802-
if (!isNullOrNullSplat(Z.getOperand(0)) && !Z.getOperand(0).isUndef())
13801+
13802+
if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
13803+
(isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
13804+
Z = Z.getOperand(1);
13805+
13806+
if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
1380313807
return SDValue();
1380413808

1380513809
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),

llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,15 +30,17 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1>
3030
;
3131
; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1:
3232
; ZVBB: # %bb.0:
33-
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma
33+
; ZVBB-NEXT: vmv1r.v v9, v0
34+
; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, mu
3435
; ZVBB-NEXT: vmv.v.i v10, 0
35-
; ZVBB-NEXT: vmerge.vim v12, v10, 1, v0
3636
; ZVBB-NEXT: vmv1r.v v0, v8
37-
; ZVBB-NEXT: vmerge.vim v8, v10, 1, v0
38-
; ZVBB-NEXT: vwsll.vi v16, v8, 8
39-
; ZVBB-NEXT: vwaddu.wv v16, v16, v12
40-
; ZVBB-NEXT: vmsne.vi v8, v18, 0
41-
; ZVBB-NEXT: vmsne.vi v0, v16, 0
37+
; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0
38+
; ZVBB-NEXT: vwsll.vi v12, v10, 8
39+
; ZVBB-NEXT: li a0, 1
40+
; ZVBB-NEXT: vmv1r.v v0, v9
41+
; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t
42+
; ZVBB-NEXT: vmsne.vi v8, v14, 0
43+
; ZVBB-NEXT: vmsne.vi v0, v12, 0
4244
; ZVBB-NEXT: csrr a0, vlenb
4345
; ZVBB-NEXT: srli a0, a0, 2
4446
; ZVBB-NEXT: add a1, a0, a0
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
4+
5+
define <vscale x 8 x i64> @vwadd_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
6+
; CHECK-LABEL: vwadd_wv_mask_v8i32:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: li a0, 42
9+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
10+
; CHECK-NEXT: vmslt.vx v0, v8, a0
11+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
12+
; CHECK-NEXT: vwadd.wv v16, v16, v8, v0.t
13+
; CHECK-NEXT: vmv8r.v v8, v16
14+
; CHECK-NEXT: ret
15+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
16+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
17+
%sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
18+
%ret = add <vscale x 8 x i64> %sa, %y
19+
ret <vscale x 8 x i64> %ret
20+
}
21+
22+
define <vscale x 8 x i64> @vwaddu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
23+
; CHECK-LABEL: vwaddu_wv_mask_v8i32:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: li a0, 42
26+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
27+
; CHECK-NEXT: vmslt.vx v0, v8, a0
28+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
29+
; CHECK-NEXT: vwaddu.wv v16, v16, v8, v0.t
30+
; CHECK-NEXT: vmv8r.v v8, v16
31+
; CHECK-NEXT: ret
32+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
33+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
34+
%sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
35+
%ret = add <vscale x 8 x i64> %sa, %y
36+
ret <vscale x 8 x i64> %ret
37+
}
38+
39+
define <vscale x 8 x i64> @vwaddu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
40+
; CHECK-LABEL: vwaddu_vv_mask_v8i32:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: li a0, 42
43+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
44+
; CHECK-NEXT: vmslt.vx v0, v8, a0
45+
; CHECK-NEXT: vmv.v.i v16, 0
46+
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
47+
; CHECK-NEXT: vwaddu.vv v16, v8, v12
48+
; CHECK-NEXT: vmv8r.v v8, v16
49+
; CHECK-NEXT: ret
50+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
51+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
52+
%sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
53+
%sy = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
54+
%ret = add <vscale x 8 x i64> %sa, %sy
55+
ret <vscale x 8 x i64> %ret
56+
}
57+
58+
define <vscale x 8 x i64> @vwadd_wv_mask_v8i32_commutative(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
59+
; CHECK-LABEL: vwadd_wv_mask_v8i32_commutative:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: li a0, 42
62+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
63+
; CHECK-NEXT: vmslt.vx v0, v8, a0
64+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
65+
; CHECK-NEXT: vwadd.wv v16, v16, v8, v0.t
66+
; CHECK-NEXT: vmv8r.v v8, v16
67+
; CHECK-NEXT: ret
68+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
69+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
70+
%sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
71+
%ret = add <vscale x 8 x i64> %y, %sa
72+
ret <vscale x 8 x i64> %ret
73+
}
74+
75+
define <vscale x 8 x i64> @vwadd_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
76+
; CHECK-LABEL: vwadd_wv_mask_v8i32_nonzero:
77+
; CHECK: # %bb.0:
78+
; CHECK-NEXT: li a0, 42
79+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
80+
; CHECK-NEXT: vmslt.vx v0, v8, a0
81+
; CHECK-NEXT: vmv.v.i v12, 1
82+
; CHECK-NEXT: vmerge.vvm v24, v12, v8, v0
83+
; CHECK-NEXT: vwadd.wv v8, v16, v24
84+
; CHECK-NEXT: ret
85+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
86+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
87+
%sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
88+
%ret = add <vscale x 8 x i64> %sa, %y
89+
ret <vscale x 8 x i64> %ret
90+
}

0 commit comments

Comments
 (0)