-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86][AVX10.2] Map vector saturated converts to public intrinsics #121483
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86][AVX10.2] Map vector saturated converts to public intrinsics #121483
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (JaydeepChauhan14) ChangesPatch is 20.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121483.diff 6 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b340a778b36ad..0dbae94d3f58e7 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5456,6 +5456,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}
+ case X86ISD::FP_TO_SINT_SAT_CUSTOM:
+ case X86ISD::FP_TO_UINT_SAT_CUSTOM:
+ if (Subtarget->hasAVX10_2()) {
+ bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM;
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Node->getValueType(0);
+ EVT OpVT = Op.getValueType();
+ MachineSDNode *MachineNode;
+
+ if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
+ if (IsSigned)
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl,
+ MVT::v4i32, Op);
+ else
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl,
+ MVT::v4i32, Op);
+ }
+
+ if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
+ if (IsSigned)
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl,
+ MVT::v2i64, Op);
+ else
+ MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl,
+ MVT::v2i64, Op);
+ }
+
+ SDValue NewNode = SDValue(MachineNode, 0);
+ ReplaceNode(Node, NewNode.getNode());
+ return;
+ }
+ break;
+
case X86ISD::ANDNP:
if (tryVPTERNLOG(Node))
return;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a0514e93d6598b..3364043cda0563 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -341,8 +341,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasAVX10_2()) {
- setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
+ for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
+ MVT::v4i64, MVT::v8i64}) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
+ }
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
@@ -2656,6 +2661,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::UINT_TO_FP,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
+ ISD::FP_TO_SINT_SAT,
+ ISD::FP_TO_UINT_SAT,
ISD::SETCC,
ISD::MUL,
ISD::XOR,
@@ -33665,6 +33672,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: {
+ if (!Subtarget.hasAVX10_2())
+ return;
+
+ bool IsSigned = Opc == ISD::FP_TO_SINT_SAT;
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDValue V4I32;
+
+ if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
+ SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op);
+ if (IsSigned)
+ V4I32 =
+ DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+ else
+ V4I32 =
+ DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
+ Results.push_back(V4I32);
+ return;
+ }
+ break;
+ }
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
@@ -34645,6 +34676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VPERMV3)
NODE_NAME_CASE(VPERMI)
NODE_NAME_CASE(VPTERNLOG)
+ NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM)
+ NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM)
NODE_NAME_CASE(VFIXUPIMM)
NODE_NAME_CASE(VFIXUPIMM_SAE)
NODE_NAME_CASE(VFIXUPIMMS)
@@ -56202,6 +56235,32 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
+static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX10_2())
+ return SDValue();
+
+ bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DstVT = N->getValueType(0);
+ SDLoc dl(N);
+
+ if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
+ // Convert v2f32 to v2f64
+ SDValue V2F64 =
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0));
+
+ // Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node
+ if (IsSigned)
+ return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
+ else
+ return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
+ }
+
+ return SDValue();
+}
+
static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
@@ -59315,6 +59374,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
// clang-format on
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 2b7a8eaf249d83..0c04cf122bddd9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -908,6 +908,10 @@ namespace llvm {
// Load x87 FPU environment from memory.
FLDENVm,
+ // Custom handling for FP_TO_xINT_SAT
+ FP_TO_SINT_SAT_CUSTOM,
+ FP_TO_UINT_SAT_CUSTOM,
+
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 3bc64eda01a9ce..e373111fe6c008 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -831,6 +831,62 @@ let Predicates = [HasAVX10_2] in {
// patterns have been disabled with null_frag.
// Patterns VCVTTPD2DQSZ128
+// VCVTTPD2DQS
+def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
+ (VCVTTPD2DQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
+ (VCVTTPD2DQSZrr VR512:$src)>;
+
+// VCVTTPD2QQS
+def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
+ (VCVTTPD2QQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
+ (VCVTTPD2QQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
+ (VCVTTPD2QQSZrr VR512:$src)>;
+
+// VCVTTPD2UDQS
+def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
+ (VCVTTPD2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
+ (VCVTTPD2UDQSZrr VR512:$src)>;
+
+// VCVTTPD2UQQS
+def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
+ (VCVTTPD2UQQSZ128rr VR128X:$src)>;
+def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
+ (VCVTTPD2UQQSZ256rr VR256X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
+ (VCVTTPD2UQQSZrr VR512:$src)>;
+
+// VCVTTPS2DQS
+def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
+ (VCVTTPS2DQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
+ (VCVTTPS2DQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
+ (VCVTTPS2DQSZrr VR512:$src)>;
+
+// VCVTTPS2QQS
+def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
+ (VCVTTPS2QQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
+ (VCVTTPS2QQSZrr VR256X:$src)>;
+
+// VCVTTPS2UDQS
+def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
+ (VCVTTPS2UDQSZ128rr VR128X:$src)>;
+def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
+ (VCVTTPS2UDQSZ256rr VR256X:$src)>;
+def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
+ (VCVTTPS2UDQSZrr VR512:$src)>;
+
+// VCVTTPS2UQQS
+def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
+ (VCVTTPS2UQQSZ256rr VR128X:$src)>;
+def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
+ (VCVTTPS2UQQSZrr VR256X:$src)>;
+
def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
(VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
diff --git a/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
new file mode 100644
index 00000000000000..70465a28bad6a7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
+
+; VCVTTPD2DQS
+define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_signed_v8i32_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i32_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPD2QQS
+define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_signed_v8i64_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i64_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f)
+ ret <8 x i64> %x
+}
+
+; VCVTTPD2UDQS
+define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i32_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i32_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPD2UQQS
+define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i64_v8f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i64_v8f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f)
+ ret <8 x i64> %x
+}
+
+; VCVTTPS2DQS
+define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind {
+; X86-LABEL: test_signed_v16i32_v16f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2dqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v16i32_v16f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f)
+ ret <16 x i32> %x
+}
+
+; VCVTTPS2UDQS
+define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v16i32_v16f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2udqs %zmm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v16i32_v16f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2udqs %zmm0, %zmm0
+; X64-NEXT: retq
+ %x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f)
+ ret <16 x i32> %x
+}
+; VCVTTPS2QQS
+define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_signed_v8i64_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2qqs %ymm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i64_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2qqs %ymm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f)
+ ret <8 x i64> %x
+}
+
+; VCVTTPS2UQQS
+define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i64_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i64_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0
+; X64-NEXT: retq
+ %x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f)
+ ret <8 x i64> %x
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
index 494e4bc8e068e4..0c731a09f8dbd2 100644
--- a/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
+++ b/llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll
@@ -112,3 +112,247 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
%x = call i64 @llvm.fptosi.sat.i64.f64(double %f)
ret i64 %x
}
+
+; VCVTTPD2DQS
+define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %d) nounwind {
+; X86-LABEL: test_signed_v2i32_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X86-NEXT: vcvttpd2dqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v2i32_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X64-NEXT: vcvttpd2dqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %d)
+ ret <2 x i32> %x
+}
+
+define <4 x i32> @test_signed_v4i32_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_signed_v4i32_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2dqs %ymm0, %xmm0
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i32_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2dqs %ymm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> %f)
+ ret <4 x i32> %x
+}
+
+; VCVTTPD2QQS
+define <2 x i64> @test_signed_v2i64_v2f64(<2 x double> %f) nounwind {
+; X86-LABEL: test_signed_v2i64_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2qqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v2i64_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f64(<2 x double> %f)
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_signed_v4i64_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_signed_v4i64_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2qqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i64_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2qqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> %f)
+ ret <4 x i64> %x
+}
+
+; VCVTTPD2UDQS
+define <2 x i32> @test_unsigned_v2i32_v2f64(<2 x double> %d) nounwind {
+; X86-LABEL: test_unsigned_v2i32_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X86-NEXT: vcvttpd2udqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v2i32_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvtpd2ps %xmm0, %xmm0
+; X64-NEXT: vcvttpd2udqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %d)
+ ret <2 x i32> %x
+}
+
+define <4 x i32> @test_unsigned_v4i32_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i32_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2udqs %ymm0, %xmm0
+; X86-NEXT: vzeroupper
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v4i32_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2udqs %ymm0, %xmm0
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> %f)
+ ret <4 x i32> %x
+}
+
+; VCVTTPD2UQQS
+define <2 x i64> @test_unsigned_v2i64_v2f64(<2 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v2i64_v2f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2uqqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v2i64_v2f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f64(<2 x double> %f)
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_unsigned_v4i64_v4f64(<4 x double> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i64_v4f64:
+; X86: # %bb.0:
+; X86-NEXT: vcvttpd2uqqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v4i64_v4f64:
+; X64: # %bb.0:
+; X64-NEXT: vcvttpd2uqqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> %f)
+ ret <4 x i64> %x
+}
+
+; VCVTTPS2DQS
+define <4 x i32> @test_signed_v4i32_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_signed_v4i32_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2dqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i32_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f)
+ ret <4 x i32> %x
+}
+
+define <8 x i32> @test_signed_v8i32_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_signed_v8i32_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2dqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v8i32_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2dqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPS2UDQS
+define <4 x i32> @test_unsigned_v4i32_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v4i32_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2udqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v4i32_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2udqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f)
+ ret <4 x i32> %x
+}
+
+define <8 x i32> @test_unsigned_v8i32_v8f32(<8 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v8i32_v8f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2udqs %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v8i32_v8f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2udqs %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> %f)
+ ret <8 x i32> %x
+}
+
+; VCVTTPS2QQS
+define <2 x i64> @test_signed_v2i64_v2f32(<2 x float> %f) nounwind {
+; X86-LABEL: test_signed_v2i64_v2f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvtps2pd %xmm0, %xmm0
+; X86-NEXT: vcvttps2qqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v2i64_v2f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtps2pd %xmm0, %xmm0
+; X64-NEXT: vcvttps2qqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %f)
+ ret <2 x i64> %x
+}
+
+define <4 x i64> @test_signed_v4i64_v4f32(<4 x float> %f) nounwind {
+; X86-LABEL: test_signed_v4i64_v4f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvttps2qqs %xmm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_signed_v4i64_v4f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvttps2qqs %xmm0, %ymm0
+; X64-NEXT: retq
+ %x = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> %f)
+ ret <4 x i64> %x
+}
+
+; VCVTTPS2UQQS
+define <2 x i64> @test_unsigned_v2i64_v2f32(<2 x float> %f) nounwind {
+; X86-LABEL: test_unsigned_v2i64_v2f32:
+; X86: # %bb.0:
+; X86-NEXT: vcvtps2pd %xmm0, %xmm0
+; X86-NEXT: vcvttps2uqqs %xmm0, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: test_unsigned_v2i64_v2f32:
+; X64: # %bb.0:
+; X64-NEXT: vcvtps2pd %xmm0, %xmm0
+; X64-NEXT: vcvttps2uqqs %xmm0, %xmm0
+; X64-NEXT: retq
+ %x = call ...
[truncated]
|
@phoebewang please review. |
Please can you add a description on the purpose of this patch |
✅ With the latest revision this PR passed the undef deprecator. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM except one comment.
We already have support for saturated convert ISA in llvm. With this patch we mapped public llvm intrinsic onto saturated convert ISA. It includes support for float, double into sign and unsigned int.