Skip to content

Commit 74ea8fe

Browse files
committed
[RISCV] Handle scalable ops with EEW > 2 dests in combineBinOp_VLToVWBinOp_VL
We can remove the restriction that the narrow type needs to be exactly EEW / 2 for scalable ISD::{ADD,SUB,MUL} nodes. This allows us to perform the combine even if we can't fully fold the extend into the widening op. VP intrinsics already do this, since they are lowered to _VL nodes which don't have this restriction. The "exactly EEW / 2" narrow type restriction prevented us from emitting V{S,Z}EXT_VL nodes with i1 element types which crash when we try to select them, since no other legal type is double the size of i1. So to preserve this, this also restricts the combine to only run after the legalize vector ops phase, at which point all unselectable i1 vectors should be custom lowered away.
1 parent 0ef61ed commit 74ea8fe

File tree

5 files changed

+613
-605
lines changed

5 files changed

+613
-605
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13652,16 +13652,6 @@ struct NodeExtensionHelper {
1365213652
if (!VT.isVector())
1365313653
break;
1365413654

13655-
SDValue NarrowElt = OrigOperand.getOperand(0);
13656-
MVT NarrowVT = NarrowElt.getSimpleValueType();
13657-
13658-
unsigned ScalarBits = VT.getScalarSizeInBits();
13659-
unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits();
13660-
13661-
// Ensure the extension's semantic is equivalent to rvv vzext or vsext.
13662-
if (ScalarBits != NarrowScalarBits * 2)
13663-
break;
13664-
1366513655
SupportsZExt = Opc == ISD::ZERO_EXTEND;
1366613656
SupportsSExt = Opc == ISD::SIGN_EXTEND;
1366713657

@@ -14112,7 +14102,9 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
1411214102
TargetLowering::DAGCombinerInfo &DCI,
1411314103
const RISCVSubtarget &Subtarget) {
1411414104
SelectionDAG &DAG = DCI.DAG;
14115-
if (DCI.isBeforeLegalize())
14105+
// Don't perform this until types are legalized and any legal i1 types are
14106+
// custom lowered to avoid introducing unselectable V{S,Z}EXT_VLs.
14107+
if (DCI.isBeforeLegalizeOps())
1411614108
return SDValue();
1411714109

1411814110
if (!NodeExtensionHelper::isSupportedRoot(N))

llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -283,18 +283,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y,
283283
;
284284
; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
285285
; FOLDING: # %bb.0:
286-
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
286+
; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
287287
; FOLDING-NEXT: vle8.v v8, (a0)
288288
; FOLDING-NEXT: vle8.v v9, (a1)
289289
; FOLDING-NEXT: vle8.v v10, (a2)
290-
; FOLDING-NEXT: vsext.vf4 v11, v8
291-
; FOLDING-NEXT: vsext.vf4 v8, v9
292-
; FOLDING-NEXT: vsext.vf4 v9, v10
293-
; FOLDING-NEXT: vmul.vv v8, v11, v8
294-
; FOLDING-NEXT: vadd.vv v10, v11, v9
295-
; FOLDING-NEXT: vsub.vv v9, v11, v9
296-
; FOLDING-NEXT: vor.vv v8, v8, v10
297-
; FOLDING-NEXT: vor.vv v8, v8, v9
290+
; FOLDING-NEXT: vsext.vf2 v11, v8
291+
; FOLDING-NEXT: vsext.vf2 v8, v9
292+
; FOLDING-NEXT: vsext.vf2 v9, v10
293+
; FOLDING-NEXT: vwmul.vv v10, v11, v8
294+
; FOLDING-NEXT: vwadd.vv v8, v11, v9
295+
; FOLDING-NEXT: vwsub.vv v12, v11, v9
296+
; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
297+
; FOLDING-NEXT: vor.vv v8, v10, v8
298+
; FOLDING-NEXT: vor.vv v8, v8, v12
298299
; FOLDING-NEXT: ret
299300
%a = load <vscale x 2 x i8>, ptr %x
300301
%b = load <vscale x 2 x i8>, ptr %y
@@ -563,18 +564,19 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y,
563564
;
564565
; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
565566
; FOLDING: # %bb.0:
566-
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
567+
; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
567568
; FOLDING-NEXT: vle8.v v8, (a0)
568569
; FOLDING-NEXT: vle8.v v9, (a1)
569570
; FOLDING-NEXT: vle8.v v10, (a2)
570-
; FOLDING-NEXT: vzext.vf4 v11, v8
571-
; FOLDING-NEXT: vzext.vf4 v8, v9
572-
; FOLDING-NEXT: vzext.vf4 v9, v10
573-
; FOLDING-NEXT: vmul.vv v8, v11, v8
574-
; FOLDING-NEXT: vadd.vv v10, v11, v9
575-
; FOLDING-NEXT: vsub.vv v9, v11, v9
576-
; FOLDING-NEXT: vor.vv v8, v8, v10
577-
; FOLDING-NEXT: vor.vv v8, v8, v9
571+
; FOLDING-NEXT: vzext.vf2 v11, v8
572+
; FOLDING-NEXT: vzext.vf2 v8, v9
573+
; FOLDING-NEXT: vzext.vf2 v9, v10
574+
; FOLDING-NEXT: vwmulu.vv v10, v11, v8
575+
; FOLDING-NEXT: vwaddu.vv v8, v11, v9
576+
; FOLDING-NEXT: vwsubu.vv v12, v11, v9
577+
; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
578+
; FOLDING-NEXT: vor.vv v8, v10, v8
579+
; FOLDING-NEXT: vor.vv v8, v8, v12
578580
; FOLDING-NEXT: ret
579581
%a = load <vscale x 2 x i8>, ptr %x
580582
%b = load <vscale x 2 x i8>, ptr %y

0 commit comments

Comments
 (0)