Skip to content

Commit 3095a5d

Browse files
committed
[GlobalISel][AArch64] Add G_FPTOSI_SAT/G_FPTOUI_SAT
This is an implementation of the saturating fp to int conversions for GlobalISel. On AArch64 the converstion instrctions work this way, producing saturating results. LegalizerHelper::lowerFPTOINT_SAT is ported from SDAG. AArch64 has a lot of existing tests for fptosi_sat, covering a wide range of types. I have tried to make most of them work all at once, but a few fall back due to other missing features such as f128 handling for min/max.
1 parent 3b426a8 commit 3095a5d

19 files changed

+9907
-4803
lines changed

llvm/docs/GlobalISel/GenericOpcode.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,11 @@ G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
504504

505505
Convert between integer and floating point.
506506

507+
G_FPTOSI_SAT, G_FPTOUI_SAT
508+
^^^^^^^^^^^^^^^^^^^^^^^^^^
509+
510+
Saturating convert between integer and floating point.
511+
507512
G_FABS
508513
^^^^^^
509514

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,8 @@ class GCastOp : public GenericMachineInstr {
823823
case TargetOpcode::G_FPEXT:
824824
case TargetOpcode::G_FPTOSI:
825825
case TargetOpcode::G_FPTOUI:
826+
case TargetOpcode::G_FPTOSI_SAT:
827+
case TargetOpcode::G_FPTOUI_SAT:
826828
case TargetOpcode::G_FPTRUNC:
827829
case TargetOpcode::G_INTTOPTR:
828830
case TargetOpcode::G_PTRTOINT:

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ class LegalizerHelper {
398398
LegalizeResult lowerSITOFP(MachineInstr &MI);
399399
LegalizeResult lowerFPTOUI(MachineInstr &MI);
400400
LegalizeResult lowerFPTOSI(MachineInstr &MI);
401+
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI);
401402

402403
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
403404
LegalizeResult lowerFPTRUNC(MachineInstr &MI);

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,16 @@ class MachineIRBuilder {
20002000
return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
20012001
}
20022002

2003+
/// Build and insert \p Res = G_FPTOUI_SAT \p Src0
2004+
MachineInstrBuilder buildFPTOUI_SAT(const DstOp &Dst, const SrcOp &Src0) {
2005+
return buildInstr(TargetOpcode::G_FPTOUI_SAT, {Dst}, {Src0});
2006+
}
2007+
2008+
/// Build and insert \p Res = G_FPTOSI_SAT \p Src0
2009+
MachineInstrBuilder buildFPTOSI_SAT(const DstOp &Dst, const SrcOp &Src0) {
2010+
return buildInstr(TargetOpcode::G_FPTOSI_SAT, {Dst}, {Src0});
2011+
}
2012+
20032013
/// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1
20042014
MachineInstrBuilder
20052015
buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0,

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,12 @@ HANDLE_TARGET_OPCODE(G_SITOFP)
682682
/// Generic unsigned-int to float conversion
683683
HANDLE_TARGET_OPCODE(G_UITOFP)
684684

685+
/// Generic saturating float to signed-int conversion
686+
HANDLE_TARGET_OPCODE(G_FPTOSI_SAT)
687+
688+
/// Generic saturating float to unsigned-int conversion
689+
HANDLE_TARGET_OPCODE(G_FPTOUI_SAT)
690+
685691
/// Generic FP absolute value.
686692
HANDLE_TARGET_OPCODE(G_FABS)
687693

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,18 @@ def G_UITOFP : GenericInstruction {
769769
let hasSideEffects = false;
770770
}
771771

772+
def G_FPTOSI_SAT : GenericInstruction {
773+
let OutOperandList = (outs type0:$dst);
774+
let InOperandList = (ins type1:$src);
775+
let hasSideEffects = false;
776+
}
777+
778+
def G_FPTOUI_SAT : GenericInstruction {
779+
let OutOperandList = (outs type0:$dst);
780+
let InOperandList = (ins type1:$src);
781+
let hasSideEffects = false;
782+
}
783+
772784
def G_FABS : GenericInstruction {
773785
let OutOperandList = (outs type0:$dst);
774786
let InOperandList = (ins type0:$src);

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
9898
def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
9999
def : GINodeEquiv<G_SITOFP, sint_to_fp>;
100100
def : GINodeEquiv<G_UITOFP, uint_to_fp>;
101+
def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat_gi>;
102+
def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat_gi>;
101103
def : GINodeEquiv<G_FADD, fadd>;
102104
def : GINodeEquiv<G_FSUB, fsub>;
103105
def : GINodeEquiv<G_FMA, fma>;

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,8 @@ def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
569569
def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
570570
def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
571571
def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
572+
def fp_to_sint_sat_gi : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntOp>;
573+
def fp_to_uint_sat_gi : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntOp>;
572574
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
573575
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
574576
def bf16_to_fp : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2340,6 +2340,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
23402340
MachineInstr::copyFlagsFromInstruction(CI));
23412341
return true;
23422342
}
2343+
case Intrinsic::fptosi_sat:
2344+
MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
2345+
getOrCreateVReg(*CI.getArgOperand(0)));
2346+
return true;
2347+
case Intrinsic::fptoui_sat:
2348+
MIRBuilder.buildFPTOUI_SAT(getOrCreateVReg(CI),
2349+
getOrCreateVReg(*CI.getArgOperand(0)));
2350+
return true;
23432351
case Intrinsic::memcpy_inline:
23442352
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
23452353
case Intrinsic::memcpy:

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
18801880
}
18811881
case TargetOpcode::G_FPTOUI:
18821882
case TargetOpcode::G_FPTOSI:
1883+
case TargetOpcode::G_FPTOUI_SAT:
1884+
case TargetOpcode::G_FPTOSI_SAT:
18831885
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
18841886
case TargetOpcode::G_FPEXT:
18851887
if (TypeIdx != 0)
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
28722874
else
28732875
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
28742876

2877+
Observer.changedInstr(MI);
2878+
return Legalized;
2879+
case TargetOpcode::G_FPTOSI_SAT:
2880+
case TargetOpcode::G_FPTOUI_SAT:
2881+
Observer.changingInstr(MI);
2882+
2883+
if (TypeIdx == 0) {
2884+
Register OldDst = MI.getOperand(0).getReg();
2885+
LLT Ty = MRI.getType(OldDst);
2886+
Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2887+
Register NewDst;
2888+
MI.getOperand(0).setReg(ExtReg);
2889+
uint64_t ShortBits = Ty.getScalarSizeInBits();
2890+
uint64_t WideBits = WideTy.getScalarSizeInBits();
2891+
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2892+
if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2893+
// z = i16 fptosi_sat(a)
2894+
// ->
2895+
// x = i32 fptosi_sat(a)
2896+
// y = smin(x, 32767)
2897+
// z = smax(y, -32768)
2898+
auto MaxVal = MIRBuilder.buildConstant(
2899+
WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2900+
auto MinVal = MIRBuilder.buildConstant(
2901+
WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
2902+
Register MidReg =
2903+
MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
2904+
NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
2905+
} else {
2906+
// z = i16 fptoui_sat(a)
2907+
// ->
2908+
// x = i32 fptoui_sat(a)
2909+
// y = smin(x, 65535)
2910+
auto MaxVal = MIRBuilder.buildConstant(
2911+
WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
2912+
NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
2913+
}
2914+
MIRBuilder.buildTrunc(OldDst, NewDst);
2915+
} else
2916+
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2917+
28752918
Observer.changedInstr(MI);
28762919
return Legalized;
28772920
case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
41704213
return lowerFPTOUI(MI);
41714214
case G_FPTOSI:
41724215
return lowerFPTOSI(MI);
4216+
case G_FPTOUI_SAT:
4217+
case G_FPTOSI_SAT:
4218+
return lowerFPTOINT_SAT(MI);
41734219
case G_FPTRUNC:
41744220
return lowerFPTRUNC(MI);
41754221
case G_FPOWI:
@@ -4986,6 +5032,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
49865032
case G_UITOFP:
49875033
case G_FPTOSI:
49885034
case G_FPTOUI:
5035+
case G_FPTOSI_SAT:
5036+
case G_FPTOUI_SAT:
49895037
case G_INTTOPTR:
49905038
case G_PTRTOINT:
49915039
case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
57775825
case TargetOpcode::G_FPEXT:
57785826
case TargetOpcode::G_FPTOSI:
57795827
case TargetOpcode::G_FPTOUI:
5828+
case TargetOpcode::G_FPTOSI_SAT:
5829+
case TargetOpcode::G_FPTOUI_SAT:
57805830
case TargetOpcode::G_SITOFP:
57815831
case TargetOpcode::G_UITOFP: {
57825832
Observer.changingInstr(MI);
@@ -7285,6 +7335,106 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
72857335
return Legalized;
72867336
}
72877337

7338+
LegalizerHelper::LegalizeResult
7339+
LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
7340+
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7341+
7342+
bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7343+
unsigned SatWidth = DstTy.getScalarSizeInBits();
7344+
7345+
// Determine minimum and maximum integer values and their corresponding
7346+
// floating-point values.
7347+
APInt MinInt, MaxInt;
7348+
if (IsSigned) {
7349+
MinInt = APInt::getSignedMinValue(SatWidth);
7350+
MaxInt = APInt::getSignedMaxValue(SatWidth);
7351+
} else {
7352+
MinInt = APInt::getMinValue(SatWidth);
7353+
MaxInt = APInt::getMaxValue(SatWidth);
7354+
}
7355+
7356+
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7357+
APFloat MinFloat(Semantics);
7358+
APFloat MaxFloat(Semantics);
7359+
7360+
APFloat::opStatus MinStatus =
7361+
MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7362+
APFloat::opStatus MaxStatus =
7363+
MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7364+
bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7365+
!(MaxStatus & APFloat::opStatus::opInexact);
7366+
7367+
// If the integer bounds are exactly representable as floats, emit a
7368+
// min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7369+
// and selects.
7370+
if (AreExactFloatBounds) {
7371+
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7372+
auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
7373+
auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
7374+
SrcTy.changeElementSize(1), Src, MaxC);
7375+
auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7376+
// Clamp by MaxFloat from above. NaN cannot occur.
7377+
auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7378+
auto MinP =
7379+
MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
7380+
MinC, MachineInstr::FmNoNans);
7381+
auto Min =
7382+
MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7383+
// Convert clamped value to integer. In the unsigned case we're done,
7384+
// because we mapped NaN to MinFloat, which will cast to zero.
7385+
if (!IsSigned) {
7386+
MIRBuilder.buildFPTOUI(Dst, Min);
7387+
MI.eraseFromParent();
7388+
return Legalized;
7389+
}
7390+
7391+
// Otherwise, select 0 if Src is NaN.
7392+
auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7393+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7394+
DstTy.changeElementSize(1), Src, Src);
7395+
MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7396+
FpToInt);
7397+
MI.eraseFromParent();
7398+
return Legalized;
7399+
}
7400+
7401+
// Result of direct conversion. The assumption here is that the operation is
7402+
// non-trapping and it's fine to apply it to an out-of-range value if we
7403+
// select it away later.
7404+
auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7405+
: MIRBuilder.buildFPTOUI(DstTy, Src);
7406+
7407+
// If Src ULT MinFloat, select MinInt. In particular, this also selects
7408+
// MinInt if Src is NaN.
7409+
auto ULT =
7410+
MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
7411+
MIRBuilder.buildFConstant(SrcTy, MinFloat));
7412+
auto Max = MIRBuilder.buildSelect(
7413+
DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7414+
// If Src OGT MaxFloat, select MaxInt.
7415+
auto OGT =
7416+
MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
7417+
MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7418+
7419+
// In the unsigned case we are done, because we mapped NaN to MinInt, which
7420+
// is already zero.
7421+
if (!IsSigned) {
7422+
MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7423+
Max, MachineInstr::FmNoNans);
7424+
MI.eraseFromParent();
7425+
return Legalized;
7426+
}
7427+
7428+
// Otherwise, select 0 if Src is NaN.
7429+
auto Min = MIRBuilder.buildSelect(
7430+
DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7431+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7432+
DstTy.changeElementSize(1), Src, Src);
7433+
MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7434+
MI.eraseFromParent();
7435+
return Legalized;
7436+
}
7437+
72887438
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
72897439
LegalizerHelper::LegalizeResult
72907440
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4724,7 +4724,7 @@ defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
47244724
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
47254725

47264726
// AArch64's FCVT instructions saturate when out of range.
4727-
multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4727+
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
47284728
let Predicates = [HasFullFP16] in {
47294729
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
47304730
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
@@ -4740,6 +4740,21 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
47404740
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
47414741
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
47424742

4743+
let Predicates = [HasFullFP16] in {
4744+
def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
4745+
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4746+
def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
4747+
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4748+
}
4749+
def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
4750+
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4751+
def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
4752+
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4753+
def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
4754+
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4755+
def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
4756+
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4757+
47434758
let Predicates = [HasFullFP16] in {
47444759
def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
47454760
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -4754,10 +4769,25 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
47544769
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
47554770
def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
47564771
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4772+
4773+
let Predicates = [HasFullFP16] in {
4774+
def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4775+
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4776+
def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4777+
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4778+
}
4779+
def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4780+
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4781+
def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4782+
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4783+
def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4784+
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4785+
def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4786+
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
47574787
}
47584788

4759-
defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4760-
defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4789+
defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
4790+
defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
47614791

47624792
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
47634793
let Predicates = [HasFullFP16] in {
@@ -5303,22 +5333,34 @@ defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
53035333
defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
53045334

53055335
// AArch64's FCVT instructions saturate when out of range.
5306-
multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
5336+
multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
53075337
let Predicates = [HasFullFP16] in {
53085338
def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
53095339
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
53105340
def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
53115341
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
5342+
5343+
def : Pat<(v4i16 (to_int_sat_gi v4f16:$Rn)),
5344+
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
5345+
def : Pat<(v8i16 (to_int_sat_gi v8f16:$Rn)),
5346+
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
53125347
}
53135348
def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
53145349
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
53155350
def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
53165351
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
53175352
def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
53185353
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
5354+
5355+
def : Pat<(v2i32 (to_int_sat_gi v2f32:$Rn)),
5356+
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
5357+
def : Pat<(v4i32 (to_int_sat_gi v4f32:$Rn)),
5358+
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
5359+
def : Pat<(v2i64 (to_int_sat_gi v2f64:$Rn)),
5360+
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
53195361
}
5320-
defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
5321-
defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
5362+
defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
5363+
defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
53225364

53235365
def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
53245366
def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;

0 commit comments

Comments
 (0)