-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. #80523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Chia (sun-jacobi) ChangesExtend #78403 and #80079 to support Code
Before this patch
After this patch
Full diff: https://github.com/llvm/llvm-project/pull/80523.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b5db41197a35a..32ef41e270f63 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13761,11 +13761,13 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
return InputRootReplacement;
}
-// Fold (vwadd.wv y, (vmerge cond, x, 0)) -> vwadd.wv y, x, y, cond
+// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
+// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
// y will be the Passthru and cond will be the Mask.
-static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
- assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
+ assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
+ Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
SDValue Y = N->getOperand(0);
SDValue MergeOp = N->getOperand(1);
@@ -13804,16 +13806,17 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
N->getFlags());
}
-static SDValue performVWADDW_VLCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const RISCVSubtarget &Subtarget) {
+static SDValue performVWADDSUBW_VLCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
[[maybe_unused]] unsigned Opc = N->getOpcode();
- assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
+ assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
+ Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);
if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;
- return combineVWADDWSelect(N, DCI.DAG);
+ return combineVWADDSUBWSelect(N, DCI.DAG);
}
// Helper function for performMemPairCombine.
@@ -15886,10 +15889,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return combineToVWMACC(N, DAG, Subtarget);
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
- return performVWADDW_VLCombine(N, DCI, Subtarget);
- case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
+ return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
+ case RISCVISD::SUB_VL:
case RISCVISD::MUL_VL:
return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
case RISCVISD::VFMADD_VL:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
new file mode 100644
index 0000000000000..382f00913cb41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT: vwsub.wv v12, v12, v8, v0.t
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+ %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ %sa = sext <8 x i32> %a to <8 x i64>
+ %ret = sub <8 x i64> %y, %sa
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsubu_wv_mask_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
+; CHECK-NEXT: vwsubu.wv v12, v12, v8, v0.t
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+ %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ %sa = zext <8 x i32> %a to <8 x i64>
+ %ret = sub <8 x i64> %y, %sa
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: vwsubu_vv_mask_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vmv.v.i v12, 0
+; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
+; CHECK-NEXT: vwsubu.vv v12, v10, v8
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+ %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ %sa = zext <8 x i32> %a to <8 x i64>
+ %sy = zext <8 x i32> %y to <8 x i64>
+ %ret = sub <8 x i64> %sy, %sa
+ ret <8 x i64> %ret
+}
+
+define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vmv.v.i v10, 1
+; CHECK-NEXT: vmerge.vvm v16, v10, v8, v0
+; CHECK-NEXT: vwsub.wv v8, v12, v16
+; CHECK-NEXT: ret
+ %mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+ %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+ %sa = sext <8 x i32> %a to <8 x i64>
+ %ret = sub <8 x i64> %y, %sa
+ ret <8 x i64> %ret
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
new file mode 100644
index 0000000000000..0cc0063c1d41c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+
+define <vscale x 8 x i64> @vwsub_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT: vwsub.wv v16, v16, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+ %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+ %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+ %ret = sub <vscale x 8 x i64> %y, %sa
+ ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsubu_wv_mask_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
+; CHECK-NEXT: vwsubu.wv v16, v16, v8, v0.t
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+ %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+ %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+ %ret = sub <vscale x 8 x i64> %y, %sa
+ ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsubu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
+; CHECK-LABEL: vwsubu_vv_mask_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vmv.v.i v16, 0
+; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
+; CHECK-NEXT: vwsubu.vv v16, v12, v8
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+ %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
+ %sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+ %sy = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
+ %ret = sub <vscale x 8 x i64> %sy, %sa
+ ret <vscale x 8 x i64> %ret
+}
+
+define <vscale x 8 x i64> @vwsub_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
+; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 42
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmslt.vx v0, v8, a0
+; CHECK-NEXT: vmv.v.i v12, 1
+; CHECK-NEXT: vmerge.vvm v24, v12, v8, v0
+; CHECK-NEXT: vwsub.wv v8, v16, v24
+; CHECK-NEXT: ret
+ %mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
+ %sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
+ %ret = sub <vscale x 8 x i64> %y, %sa
+ ret <vscale x 8 x i64> %ret
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. For a future patch, could we generalize this with llvm::isNeutralConstant
so we can handle vwmul too?
Sound great! Thank you. |
Isn't this patch for .wv instructions? There is no vwmul.wv. |
oops, you are right. Thanks |
10ccfc8
to
a53a3de
Compare
Extend #78403 and #80079 to support
vwsub.wv
andvwsubu.wv
.Code
Before this patch
Compiler Explorer
After this patch