Skip to content

Commit 503ce95

Browse files
sun-jacobiagozillon
authored andcommitted
[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. (llvm#80523)
1 parent 0bc3086 commit 503ce95

File tree

3 files changed

+159
-10
lines changed

3 files changed

+159
-10
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13848,11 +13848,13 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
1384813848
return InputRootReplacement;
1384913849
}
1385013850

13851-
// Fold (vwadd.wv y, (vmerge cond, x, 0)) -> vwadd.wv y, x, y, cond
13851+
// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
13852+
// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
1385213853
// y will be the Passthru and cond will be the Mask.
13853-
static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
13854+
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
1385413855
unsigned Opc = N->getOpcode();
13855-
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
13856+
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
13857+
Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
1385613858

1385713859
SDValue Y = N->getOperand(0);
1385813860
SDValue MergeOp = N->getOperand(1);
@@ -13891,16 +13893,17 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
1389113893
N->getFlags());
1389213894
}
1389313895

13894-
static SDValue performVWADDW_VLCombine(SDNode *N,
13895-
TargetLowering::DAGCombinerInfo &DCI,
13896-
const RISCVSubtarget &Subtarget) {
13896+
static SDValue performVWADDSUBW_VLCombine(SDNode *N,
13897+
TargetLowering::DAGCombinerInfo &DCI,
13898+
const RISCVSubtarget &Subtarget) {
1389713899
[[maybe_unused]] unsigned Opc = N->getOpcode();
13898-
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
13900+
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
13901+
Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
1389913902

1390013903
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
1390113904
return V;
1390213905

13903-
return combineVWADDWSelect(N, DCI.DAG);
13906+
return combineVWADDSUBWSelect(N, DCI.DAG);
1390413907
}
1390513908

1390613909
// Helper function for performMemPairCombine.
@@ -15973,10 +15976,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1597315976
return combineToVWMACC(N, DAG, Subtarget);
1597415977
case RISCVISD::VWADD_W_VL:
1597515978
case RISCVISD::VWADDU_W_VL:
15976-
return performVWADDW_VLCombine(N, DCI, Subtarget);
15977-
case RISCVISD::SUB_VL:
1597815979
case RISCVISD::VWSUB_W_VL:
1597915980
case RISCVISD::VWSUBU_W_VL:
15981+
return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
15982+
case RISCVISD::SUB_VL:
1598015983
case RISCVISD::MUL_VL:
1598115984
return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
1598215985
case RISCVISD::VFMADD_VL:
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
4+
5+
define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
6+
; CHECK-LABEL: vwsub_wv_mask_v8i32:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: li a0, 42
9+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10+
; CHECK-NEXT: vmslt.vx v0, v8, a0
11+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
12+
; CHECK-NEXT: vwsub.wv v12, v12, v8, v0.t
13+
; CHECK-NEXT: vmv4r.v v8, v12
14+
; CHECK-NEXT: ret
15+
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
16+
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
17+
%sa = sext <8 x i32> %a to <8 x i64>
18+
%ret = sub <8 x i64> %y, %sa
19+
ret <8 x i64> %ret
20+
}
21+
22+
define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
23+
; CHECK-LABEL: vwsubu_wv_mask_v8i32:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: li a0, 42
26+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
27+
; CHECK-NEXT: vmslt.vx v0, v8, a0
28+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
29+
; CHECK-NEXT: vwsubu.wv v12, v12, v8, v0.t
30+
; CHECK-NEXT: vmv4r.v v8, v12
31+
; CHECK-NEXT: ret
32+
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
33+
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
34+
%sa = zext <8 x i32> %a to <8 x i64>
35+
%ret = sub <8 x i64> %y, %sa
36+
ret <8 x i64> %ret
37+
}
38+
39+
define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
40+
; CHECK-LABEL: vwsubu_vv_mask_v8i32:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: li a0, 42
43+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
44+
; CHECK-NEXT: vmslt.vx v0, v8, a0
45+
; CHECK-NEXT: vmv.v.i v12, 0
46+
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
47+
; CHECK-NEXT: vwsubu.vv v12, v10, v8
48+
; CHECK-NEXT: vmv4r.v v8, v12
49+
; CHECK-NEXT: ret
50+
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
51+
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
52+
%sa = zext <8 x i32> %a to <8 x i64>
53+
%sy = zext <8 x i32> %y to <8 x i64>
54+
%ret = sub <8 x i64> %sy, %sa
55+
ret <8 x i64> %ret
56+
}
57+
58+
define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) {
59+
; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: li a0, 42
62+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
63+
; CHECK-NEXT: vmslt.vx v0, v8, a0
64+
; CHECK-NEXT: vmv.v.i v10, 1
65+
; CHECK-NEXT: vmerge.vvm v16, v10, v8, v0
66+
; CHECK-NEXT: vwsub.wv v8, v12, v16
67+
; CHECK-NEXT: ret
68+
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
69+
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
70+
%sa = sext <8 x i32> %a to <8 x i64>
71+
%ret = sub <8 x i64> %y, %sa
72+
ret <8 x i64> %ret
73+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
4+
5+
define <vscale x 8 x i64> @vwsub_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
6+
; CHECK-LABEL: vwsub_wv_mask_v8i32:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: li a0, 42
9+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
10+
; CHECK-NEXT: vmslt.vx v0, v8, a0
11+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
12+
; CHECK-NEXT: vwsub.wv v16, v16, v8, v0.t
13+
; CHECK-NEXT: vmv8r.v v8, v16
14+
; CHECK-NEXT: ret
15+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
16+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
17+
%sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
18+
%ret = sub <vscale x 8 x i64> %y, %sa
19+
ret <vscale x 8 x i64> %ret
20+
}
21+
22+
define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
23+
; CHECK-LABEL: vwsubu_wv_mask_v8i32:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: li a0, 42
26+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
27+
; CHECK-NEXT: vmslt.vx v0, v8, a0
28+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
29+
; CHECK-NEXT: vwsubu.wv v16, v16, v8, v0.t
30+
; CHECK-NEXT: vmv8r.v v8, v16
31+
; CHECK-NEXT: ret
32+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
33+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
34+
%sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
35+
%ret = sub <vscale x 8 x i64> %y, %sa
36+
ret <vscale x 8 x i64> %ret
37+
}
38+
39+
define <vscale x 8 x i64> @vwsubu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
40+
; CHECK-LABEL: vwsubu_vv_mask_v8i32:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: li a0, 42
43+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
44+
; CHECK-NEXT: vmslt.vx v0, v8, a0
45+
; CHECK-NEXT: vmv.v.i v16, 0
46+
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
47+
; CHECK-NEXT: vwsubu.vv v16, v12, v8
48+
; CHECK-NEXT: vmv8r.v v8, v16
49+
; CHECK-NEXT: ret
50+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
51+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
52+
%sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
53+
%sy = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
54+
%ret = sub <vscale x 8 x i64> %sy, %sa
55+
ret <vscale x 8 x i64> %ret
56+
}
57+
58+
define <vscale x 8 x i64> @vwsub_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
59+
; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: li a0, 42
62+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
63+
; CHECK-NEXT: vmslt.vx v0, v8, a0
64+
; CHECK-NEXT: vmv.v.i v12, 1
65+
; CHECK-NEXT: vmerge.vvm v24, v12, v8, v0
66+
; CHECK-NEXT: vwsub.wv v8, v16, v24
67+
; CHECK-NEXT: ret
68+
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
69+
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
70+
%sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
71+
%ret = sub <vscale x 8 x i64> %y, %sa
72+
ret <vscale x 8 x i64> %ret
73+
}

0 commit comments

Comments
 (0)