-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV] Support Strict FP arithmetic Op when only have Zvfhmin #68867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Jianjian Guan (jacquesguan) ChangesInclude: STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FSQRT and STRICT_FMA. Patch is 192.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68867.diff 11 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ddc3b94e9c29a58..7e8faaecedfe146 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -179,6 +179,8 @@ class VectorLegalizer {
/// type.
void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+ void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
public:
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
@@ -634,6 +636,38 @@ void VectorLegalizer::PromoteSETCC(SDNode *Node,
Results.push_back(Res);
}
+void VectorLegalizer::PromoteSTRICT(SDNode *Node,
+ SmallVectorImpl<SDValue> &Results) {
+ MVT VecVT = Node->getOperand(1).getSimpleValueType();
+ MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
+
+ assert(VecVT.isFloatingPoint());
+
+ SDLoc DL(Node);
+ SmallVector<SDValue, 5> Operands(Node->getNumOperands());
+
+ for (unsigned j = 0; j != Node->getNumOperands(); ++j)
+ if (Node->getOperand(j).getValueType().isVector() &&
+ !(ISD::isVPOpcode(Node->getOpcode()) &&
+ ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
+ // promote the vector operand.
+ Operands[j] =
+ DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j));
+ else
+ Operands[j] = Node->getOperand(j); // Skip VL operand.
+
+ SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
+
+ SDValue Res =
+ DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
+
+ SDValue Res0 = DAG.getNode(ISD::FP_ROUND, DL, VecVT, Res.getValue(0),
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+
+ Results.push_back(Res0);
+ Results.push_back(Res.getValue(1));
+}
+
void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// For a few operations there is a specific concept for promotion based on
// the operand's type.
@@ -674,6 +708,14 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
// Promote the operation by extending the operand.
PromoteSETCC(Node, Results);
return;
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ PromoteSTRICT(Node, Results);
+ return;
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
// These operations are used to do promotion so they can't be promoted
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5cf5ee496656da3..9559849d74cbfd6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -827,12 +827,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// TODO: support more ops.
static const unsigned ZvfhminPromoteOps[] = {
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
- ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
- ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
- ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
- ISD::FMINIMUM};
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
+ ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
+ ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
+ ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
+ ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA};
// TODO: support more vp ops.
static const unsigned ZvfhminPromoteVPOps[] = {
@@ -5452,6 +5453,41 @@ static SDValue SplitVectorReductionOp(SDValue Op, SelectionDAG &DAG) {
{ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
}
+static SDValue SplitStrictFPVectorOp(SDValue Op, SelectionDAG &DAG) {
+
+ assert(Op->isStrictFPOpcode());
+
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
+
+ SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
+ SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
+
+ SDLoc DL(Op);
+
+ SmallVector<SDValue, 4> LoOperands(Op.getNumOperands());
+ SmallVector<SDValue, 4> HiOperands(Op.getNumOperands());
+
+ for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+ if (!Op.getOperand(j).getValueType().isVector()) {
+ LoOperands[j] = Op.getOperand(j);
+ HiOperands[j] = Op.getOperand(j);
+ continue;
+ }
+ std::tie(LoOperands[j], HiOperands[j]) =
+ DAG.SplitVector(Op.getOperand(j), DL);
+ }
+
+ SDValue LoRes =
+ DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
+ HiOperands[0] = LoRes.getValue(1);
+ SDValue HiRes =
+ DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
+
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
+ LoRes.getValue(0), HiRes.getValue(0));
+ return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -6223,6 +6259,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::STRICT_FDIV:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FMA:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitStrictFPVectorOp(Op, DAG);
return lowerToScalableOp(Op, DAG);
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
index 04ed41cd0952d1c..10cfa5a5cfb9099 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-constrained-sdnode.ll
@@ -1,27 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
declare <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, metadata, metadata)
define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv1f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv1f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv1f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 1 x half> %vc
}
define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv1f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%vc = call <vscale x 1 x half> @llvm.experimental.constrained.fadd.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -30,22 +60,48 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b) s
declare <vscale x 2 x half> @llvm.experimental.constrained.fadd.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, metadata, metadata)
define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv2f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv2f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv2f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 2 x half> @llvm.experimental.constrained.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 2 x half> %vc
}
define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v9, v9, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%vc = call <vscale x 2 x half> @llvm.experimental.constrained.fadd.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -54,22 +110,48 @@ define <vscale x 2 x half> @vfadd_vf_nxv2f16(<vscale x 2 x half> %va, half %b) s
declare <vscale x 4 x half> @llvm.experimental.constrained.fadd.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, metadata, metadata)
define <vscale x 4 x half> @vfadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv4f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv4f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv4f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v10, v12, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 4 x half> @llvm.experimental.constrained.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 4 x half> %vc
}
define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v10, v10, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%vc = call <vscale x 4 x half> @llvm.experimental.constrained.fadd.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -78,22 +160,48 @@ define <vscale x 4 x half> @vfadd_vf_nxv4f16(<vscale x 4 x half> %va, half %b) s
declare <vscale x 8 x half> @llvm.experimental.constrained.fadd.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, metadata, metadata)
define <vscale x 8 x half> @vfadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv8f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv8f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv8f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v12, v16, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 8 x half> @llvm.experimental.constrained.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 8 x half> %vc
}
define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv8f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v12, v12, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
+; ZVFHMIN-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %b, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%vc = call <vscale x 8 x half> @llvm.experimental.constrained.fadd.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %splat, metadata !"round.dynamic", metadata !"fpexcept.ignore")
@@ -102,22 +210,48 @@ define <vscale x 8 x half> @vfadd_vf_nxv8f16(<vscale x 8 x half> %va, half %b) s
declare <vscale x 16 x half> @llvm.experimental.constrained.fadd.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>, metadata, metadata)
define <vscale x 16 x half> @vfadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb) strictfp {
-; CHECK-LABEL: vfadd_vv_nxv16f16:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfadd.vv v8, v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vv_nxv16f16:
+; ZVFH: # %bb.0: # %entry
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfadd.vv v8, v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vv_nxv16f16:
+; ZVFHMIN: # %bb.0: # %entry
+; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT: ret
entry:
%vc = call <vscale x 16 x half> @llvm.experimental.constrained.fadd.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"round.dynamic", metadata !"fpexcept.ignore")
ret <vscale x 16 x half> %vc
}
define <vscale x 16 x half> @vfadd_vf_nxv16f16(<vscale x 16 x half> %va, half %b) strictfp {
-; CHECK-LABEL: vfadd_vf_nxv16f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; CHECK-NEXT: vfadd.vf v8, v8, fa0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: vfadd_vf_nxv16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vfadd.vf v8, v8, fa0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfadd_vf_nxv16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
+; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
+; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
+; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24
+; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
+; ZVFHMIN-NEXT:...
[truncated]
|
ping... |
ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand. | ||
// promote the vector operand. | ||
Operands[j] = | ||
DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why aren't the inserted nodes also STRICT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we use the STRICT extend or round in promotion? I think only the calculation op is constrained. This is not clear for me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think using strict_fp_extend here could help preserve the exception behavior of nodes needed promoted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Promote code in LegalizeDAG.cpp uses STRICT_FP_EXTEND and STRICT_FP_ROUND.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, changed to STRICT_FP_ROUND and STRICT_FP_EXTEND.
96bba2a
to
228a812
Compare
Include: STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FSQRT and STRICT_FMA.
228a812
to
4f32cbe
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Include: STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FSQRT and STRICT_FMA.