Skip to content

Commit 1df5c83

Browse files
committed
[GlobalISel][AArch64] Add G_FPTOSI_SAT/G_FPTOUI_SAT
This is an implementation of the saturating fp to int conversions for GlobalISel. On AArch64 the converstion instrctions work this way, producing saturating results. LegalizerHelper::lowerFPTOINT_SAT is ported from SDAG. AArch64 has a lot of existing tests for fptosi_sat, covering a wide range of types. I have tried to make most of them work all at once, but a few fall back due to other missing features such as f128 handling for min/max.
1 parent 1642f64 commit 1df5c83

18 files changed

+9906
-4802
lines changed

llvm/docs/GlobalISel/GenericOpcode.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,11 @@ G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
504504

505505
Convert between integer and floating point.
506506

507+
G_FPTOSI_SAT, G_FPTOUI_SAT
508+
^^^^^^^^^^^^^^^^^^^^^^^^^^
509+
510+
Saturating convert between integer and floating point.
511+
507512
G_FABS
508513
^^^^^^
509514

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,8 @@ class GCastOp : public GenericMachineInstr {
823823
case TargetOpcode::G_FPEXT:
824824
case TargetOpcode::G_FPTOSI:
825825
case TargetOpcode::G_FPTOUI:
826+
case TargetOpcode::G_FPTOSI_SAT:
827+
case TargetOpcode::G_FPTOUI_SAT:
826828
case TargetOpcode::G_FPTRUNC:
827829
case TargetOpcode::G_INTTOPTR:
828830
case TargetOpcode::G_PTRTOINT:

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ class LegalizerHelper {
398398
LegalizeResult lowerSITOFP(MachineInstr &MI);
399399
LegalizeResult lowerFPTOUI(MachineInstr &MI);
400400
LegalizeResult lowerFPTOSI(MachineInstr &MI);
401+
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI);
401402

402403
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
403404
LegalizeResult lowerFPTRUNC(MachineInstr &MI);

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2035,6 +2035,16 @@ class MachineIRBuilder {
20352035
return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
20362036
}
20372037

2038+
/// Build and insert \p Res = G_FPTOUI_SAT \p Src0
2039+
MachineInstrBuilder buildFPTOUI_SAT(const DstOp &Dst, const SrcOp &Src0) {
2040+
return buildInstr(TargetOpcode::G_FPTOUI_SAT, {Dst}, {Src0});
2041+
}
2042+
2043+
/// Build and insert \p Res = G_FPTOSI_SAT \p Src0
2044+
MachineInstrBuilder buildFPTOSI_SAT(const DstOp &Dst, const SrcOp &Src0) {
2045+
return buildInstr(TargetOpcode::G_FPTOSI_SAT, {Dst}, {Src0});
2046+
}
2047+
20382048
/// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1
20392049
MachineInstrBuilder
20402050
buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0,

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,12 @@ HANDLE_TARGET_OPCODE(G_SITOFP)
684684
/// Generic unsigned-int to float conversion
685685
HANDLE_TARGET_OPCODE(G_UITOFP)
686686

687+
/// Generic saturating float to signed-int conversion
688+
HANDLE_TARGET_OPCODE(G_FPTOSI_SAT)
689+
690+
/// Generic saturating float to unsigned-int conversion
691+
HANDLE_TARGET_OPCODE(G_FPTOUI_SAT)
692+
687693
/// Generic FP absolute value.
688694
HANDLE_TARGET_OPCODE(G_FABS)
689695

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,18 @@ def G_UITOFP : GenericInstruction {
769769
let hasSideEffects = false;
770770
}
771771

772+
def G_FPTOSI_SAT : GenericInstruction {
773+
let OutOperandList = (outs type0:$dst);
774+
let InOperandList = (ins type1:$src);
775+
let hasSideEffects = false;
776+
}
777+
778+
def G_FPTOUI_SAT : GenericInstruction {
779+
let OutOperandList = (outs type0:$dst);
780+
let InOperandList = (ins type1:$src);
781+
let hasSideEffects = false;
782+
}
783+
772784
def G_FABS : GenericInstruction {
773785
let OutOperandList = (outs type0:$dst);
774786
let InOperandList = (ins type0:$src);

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
9898
def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
9999
def : GINodeEquiv<G_SITOFP, sint_to_fp>;
100100
def : GINodeEquiv<G_UITOFP, uint_to_fp>;
101+
def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat_gi>;
102+
def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat_gi>;
101103
def : GINodeEquiv<G_FADD, fadd>;
102104
def : GINodeEquiv<G_FSUB, fsub>;
103105
def : GINodeEquiv<G_FMA, fma>;

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,8 @@ def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
569569
def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
570570
def fp_to_sint_sat : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntSatOp>;
571571
def fp_to_uint_sat : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntSatOp>;
572+
def fp_to_sint_sat_gi : SDNode<"ISD::FP_TO_SINT_SAT" , SDTFPToIntOp>;
573+
def fp_to_uint_sat_gi : SDNode<"ISD::FP_TO_UINT_SAT" , SDTFPToIntOp>;
572574
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
573575
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
574576
def bf16_to_fp : SDNode<"ISD::BF16_TO_FP" , SDTIntToFPOp>;

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2340,6 +2340,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
23402340
MachineInstr::copyFlagsFromInstruction(CI));
23412341
return true;
23422342
}
2343+
case Intrinsic::fptosi_sat:
2344+
MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
2345+
getOrCreateVReg(*CI.getArgOperand(0)));
2346+
return true;
2347+
case Intrinsic::fptoui_sat:
2348+
MIRBuilder.buildFPTOUI_SAT(getOrCreateVReg(CI),
2349+
getOrCreateVReg(*CI.getArgOperand(0)));
2350+
return true;
23432351
case Intrinsic::memcpy_inline:
23442352
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
23452353
case Intrinsic::memcpy:

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
18801880
}
18811881
case TargetOpcode::G_FPTOUI:
18821882
case TargetOpcode::G_FPTOSI:
1883+
case TargetOpcode::G_FPTOUI_SAT:
1884+
case TargetOpcode::G_FPTOSI_SAT:
18831885
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
18841886
case TargetOpcode::G_FPEXT:
18851887
if (TypeIdx != 0)
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
28722874
else
28732875
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
28742876

2877+
Observer.changedInstr(MI);
2878+
return Legalized;
2879+
case TargetOpcode::G_FPTOSI_SAT:
2880+
case TargetOpcode::G_FPTOUI_SAT:
2881+
Observer.changingInstr(MI);
2882+
2883+
if (TypeIdx == 0) {
2884+
Register OldDst = MI.getOperand(0).getReg();
2885+
LLT Ty = MRI.getType(OldDst);
2886+
Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2887+
Register NewDst;
2888+
MI.getOperand(0).setReg(ExtReg);
2889+
uint64_t ShortBits = Ty.getScalarSizeInBits();
2890+
uint64_t WideBits = WideTy.getScalarSizeInBits();
2891+
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2892+
if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2893+
// z = i16 fptosi_sat(a)
2894+
// ->
2895+
// x = i32 fptosi_sat(a)
2896+
// y = smin(x, 32767)
2897+
// z = smax(y, -32768)
2898+
auto MaxVal = MIRBuilder.buildConstant(
2899+
WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2900+
auto MinVal = MIRBuilder.buildConstant(
2901+
WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
2902+
Register MidReg =
2903+
MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
2904+
NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
2905+
} else {
2906+
// z = i16 fptoui_sat(a)
2907+
// ->
2908+
// x = i32 fptoui_sat(a)
2909+
// y = smin(x, 65535)
2910+
auto MaxVal = MIRBuilder.buildConstant(
2911+
WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
2912+
NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
2913+
}
2914+
MIRBuilder.buildTrunc(OldDst, NewDst);
2915+
} else
2916+
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2917+
28752918
Observer.changedInstr(MI);
28762919
return Legalized;
28772920
case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
41704213
return lowerFPTOUI(MI);
41714214
case G_FPTOSI:
41724215
return lowerFPTOSI(MI);
4216+
case G_FPTOUI_SAT:
4217+
case G_FPTOSI_SAT:
4218+
return lowerFPTOINT_SAT(MI);
41734219
case G_FPTRUNC:
41744220
return lowerFPTRUNC(MI);
41754221
case G_FPOWI:
@@ -4986,6 +5032,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
49865032
case G_UITOFP:
49875033
case G_FPTOSI:
49885034
case G_FPTOUI:
5035+
case G_FPTOSI_SAT:
5036+
case G_FPTOUI_SAT:
49895037
case G_INTTOPTR:
49905038
case G_PTRTOINT:
49915039
case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
57775825
case TargetOpcode::G_FPEXT:
57785826
case TargetOpcode::G_FPTOSI:
57795827
case TargetOpcode::G_FPTOUI:
5828+
case TargetOpcode::G_FPTOSI_SAT:
5829+
case TargetOpcode::G_FPTOUI_SAT:
57805830
case TargetOpcode::G_SITOFP:
57815831
case TargetOpcode::G_UITOFP: {
57825832
Observer.changingInstr(MI);
@@ -7285,6 +7335,106 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
72857335
return Legalized;
72867336
}
72877337

7338+
LegalizerHelper::LegalizeResult
7339+
LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
7340+
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7341+
7342+
bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7343+
unsigned SatWidth = DstTy.getScalarSizeInBits();
7344+
7345+
// Determine minimum and maximum integer values and their corresponding
7346+
// floating-point values.
7347+
APInt MinInt, MaxInt;
7348+
if (IsSigned) {
7349+
MinInt = APInt::getSignedMinValue(SatWidth);
7350+
MaxInt = APInt::getSignedMaxValue(SatWidth);
7351+
} else {
7352+
MinInt = APInt::getMinValue(SatWidth);
7353+
MaxInt = APInt::getMaxValue(SatWidth);
7354+
}
7355+
7356+
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7357+
APFloat MinFloat(Semantics);
7358+
APFloat MaxFloat(Semantics);
7359+
7360+
APFloat::opStatus MinStatus =
7361+
MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7362+
APFloat::opStatus MaxStatus =
7363+
MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7364+
bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7365+
!(MaxStatus & APFloat::opStatus::opInexact);
7366+
7367+
// If the integer bounds are exactly representable as floats, emit a
7368+
// min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7369+
// and selects.
7370+
if (AreExactFloatBounds) {
7371+
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7372+
auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
7373+
auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_ULT,
7374+
SrcTy.changeElementSize(1), Src, MaxC);
7375+
auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
7376+
// Clamp by MaxFloat from above. NaN cannot occur.
7377+
auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
7378+
auto MinP =
7379+
MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Max,
7380+
MinC, MachineInstr::FmNoNans);
7381+
auto Min =
7382+
MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7383+
// Convert clamped value to integer. In the unsigned case we're done,
7384+
// because we mapped NaN to MinFloat, which will cast to zero.
7385+
if (!IsSigned) {
7386+
MIRBuilder.buildFPTOUI(Dst, Min);
7387+
MI.eraseFromParent();
7388+
return Legalized;
7389+
}
7390+
7391+
// Otherwise, select 0 if Src is NaN.
7392+
auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7393+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7394+
DstTy.changeElementSize(1), Src, Src);
7395+
MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7396+
FpToInt);
7397+
MI.eraseFromParent();
7398+
return Legalized;
7399+
}
7400+
7401+
// Result of direct conversion. The assumption here is that the operation is
7402+
// non-trapping and it's fine to apply it to an out-of-range value if we
7403+
// select it away later.
7404+
auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7405+
: MIRBuilder.buildFPTOUI(DstTy, Src);
7406+
7407+
// If Src ULT MinFloat, select MinInt. In particular, this also selects
7408+
// MinInt if Src is NaN.
7409+
auto ULT =
7410+
MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
7411+
MIRBuilder.buildFConstant(SrcTy, MinFloat));
7412+
auto Max = MIRBuilder.buildSelect(
7413+
DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7414+
// If Src OGT MaxFloat, select MaxInt.
7415+
auto OGT =
7416+
MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
7417+
MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7418+
7419+
// In the unsigned case we are done, because we mapped NaN to MinInt, which
7420+
// is already zero.
7421+
if (!IsSigned) {
7422+
MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7423+
Max, MachineInstr::FmNoNans);
7424+
MI.eraseFromParent();
7425+
return Legalized;
7426+
}
7427+
7428+
// Otherwise, select 0 if Src is NaN.
7429+
auto Min = MIRBuilder.buildSelect(
7430+
DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7431+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7432+
DstTy.changeElementSize(1), Src, Src);
7433+
MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7434+
MI.eraseFromParent();
7435+
return Legalized;
7436+
}
7437+
72887438
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
72897439
LegalizerHelper::LegalizeResult
72907440
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4729,7 +4729,7 @@ defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
47294729
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
47304730

47314731
// AArch64's FCVT instructions saturate when out of range.
4732-
multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4732+
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
47334733
let Predicates = [HasFullFP16] in {
47344734
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
47354735
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
@@ -4745,6 +4745,21 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
47454745
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
47464746
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
47474747

4748+
let Predicates = [HasFullFP16] in {
4749+
def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
4750+
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4751+
def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
4752+
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4753+
}
4754+
def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
4755+
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4756+
def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
4757+
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4758+
def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
4759+
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4760+
def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
4761+
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4762+
47484763
let Predicates = [HasFullFP16] in {
47494764
def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
47504765
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
@@ -4759,10 +4774,25 @@ multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
47594774
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
47604775
def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
47614776
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4777+
4778+
let Predicates = [HasFullFP16] in {
4779+
def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4780+
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4781+
def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4782+
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4783+
}
4784+
def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4785+
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4786+
def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4787+
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4788+
def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4789+
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4790+
def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4791+
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
47624792
}
47634793

4764-
defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4765-
defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4794+
defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
4795+
defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
47664796

47674797
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
47684798
let Predicates = [HasFullFP16] in {
@@ -5308,22 +5338,34 @@ defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
53085338
defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
53095339

53105340
// AArch64's FCVT instructions saturate when out of range.
5311-
multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
5341+
multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
53125342
let Predicates = [HasFullFP16] in {
53135343
def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
53145344
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
53155345
def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
53165346
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
5347+
5348+
def : Pat<(v4i16 (to_int_sat_gi v4f16:$Rn)),
5349+
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
5350+
def : Pat<(v8i16 (to_int_sat_gi v8f16:$Rn)),
5351+
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
53175352
}
53185353
def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
53195354
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
53205355
def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
53215356
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
53225357
def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
53235358
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
5359+
5360+
def : Pat<(v2i32 (to_int_sat_gi v2f32:$Rn)),
5361+
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
5362+
def : Pat<(v4i32 (to_int_sat_gi v4f32:$Rn)),
5363+
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
5364+
def : Pat<(v2i64 (to_int_sat_gi v2f64:$Rn)),
5365+
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
53245366
}
5325-
defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
5326-
defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
5367+
defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
5368+
defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
53275369

53285370
def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
53295371
def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;

0 commit comments

Comments
 (0)