Skip to content

[X86][AVX10.2] Map vector saturated converts to public intrinsics #121483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 64 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasAVX10_2()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
MVT::v4i64}) {
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
}
if (Subtarget.hasAVX10_2_512()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v8i64, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v8i64, Legal);
}
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
Expand Down Expand Up @@ -2656,6 +2665,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::UINT_TO_FP,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
ISD::FP_TO_SINT_SAT,
ISD::FP_TO_UINT_SAT,
ISD::SETCC,
ISD::MUL,
ISD::XOR,
Expand Down Expand Up @@ -33665,6 +33676,26 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: {
if (!Subtarget.hasAVX10_2())
return;

bool IsSigned = Opc == ISD::FP_TO_SINT_SAT;
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
SDValue Res;

if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
if (IsSigned)
Res = DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v4i32, Op);
else
Res = DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v4i32, Op);
Results.push_back(Res);
}
return;
}
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
Expand Down Expand Up @@ -34645,6 +34676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VPERMV3)
NODE_NAME_CASE(VPERMI)
NODE_NAME_CASE(VPTERNLOG)
NODE_NAME_CASE(FP_TO_SINT_SAT)
NODE_NAME_CASE(FP_TO_UINT_SAT)
NODE_NAME_CASE(VFIXUPIMM)
NODE_NAME_CASE(VFIXUPIMM_SAE)
NODE_NAME_CASE(VFIXUPIMMS)
Expand Down Expand Up @@ -56202,6 +56235,33 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX10_2())
return SDValue();

bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
SDLoc dl(N);

if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
SDValue V2F32Value = DAG.getUNDEF(SrcVT);

// Concatenate the original v2f32 input and V2F32Value to create v4f32
SDValue NewSrc = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
N->getOperand(0), V2F32Value);

// Select the FP_TO_SINT_SAT/FP_TO_UINT_SAT node
if (IsSigned)
return DAG.getNode(X86ISD::FP_TO_SINT_SAT, dl, MVT::v2i64, NewSrc);

return DAG.getNode(X86ISD::FP_TO_UINT_SAT, dl, MVT::v2i64, NewSrc);
}
return SDValue();
}

static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");

Expand Down Expand Up @@ -59315,6 +59375,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
// clang-format on
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,10 @@ namespace llvm {
// Load x87 FPU environment from memory.
FLDENVm,

// Custom handling for FP_TO_xINT_SAT
FP_TO_SINT_SAT,
FP_TO_UINT_SAT,

/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
Expand Down
64 changes: 64 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX10.td
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,70 @@ let Predicates = [HasAVX10_2] in {
// patterns have been disabled with null_frag.
// Patterns VCVTTPD2DQSZ128

// VCVTTPD2DQS
def : Pat<(v4i32(X86fp2sisat(v2f64 VR128X:$src))),
(VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2DQSZ256rr VR256X:$src)>;
def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
(VCVTTPD2DQSZrr VR512:$src)>;

// VCVTTPD2QQS
def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
(VCVTTPD2QQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
(VCVTTPD2QQSZ256rr VR256X:$src)>;
def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
(VCVTTPD2QQSZrr VR512:$src)>;

// VCVTTPD2UDQS
def : Pat<(v4i32(X86fp2uisat(v2f64 VR128X:$src))),
(VCVTTPD2UDQSZ128rr VR128X:$src)>;
def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2UDQSZ256rr VR256X:$src)>;
def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
(VCVTTPD2UDQSZrr VR512:$src)>;

// VCVTTPD2UQQS
def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
(VCVTTPD2UQQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
(VCVTTPD2UQQSZ256rr VR256X:$src)>;
def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
(VCVTTPD2UQQSZrr VR512:$src)>;

// VCVTTPS2DQS
def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
(VCVTTPS2DQSZ128rr VR128X:$src)>;
def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
(VCVTTPS2DQSZ256rr VR256X:$src)>;
def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2DQSZrr VR512:$src)>;

// VCVTTPS2QQS
def : Pat<(v2i64(X86fp2sisat(v4f32 VR128X:$src))),
(VCVTTPS2QQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2QQSZ256rr VR128X:$src)>;
def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
(VCVTTPS2QQSZrr VR256X:$src)>;

// VCVTTPS2UDQS
def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
(VCVTTPS2UDQSZ128rr VR128X:$src)>;
def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
(VCVTTPS2UDQSZ256rr VR256X:$src)>;
def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2UDQSZrr VR512:$src)>;

// VCVTTPS2UQQS
def : Pat<(v2i64(X86fp2uisat(v4f32 VR128X:$src))),
(VCVTTPS2UQQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2UQQSZ256rr VR128X:$src)>;
def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
(VCVTTPS2UQQSZrr VR256X:$src)>;

def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
(VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,13 @@ def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
SDTCisSameAs<1,2>, SDTCisSameAs<1,3>,
SDTCisFP<0>, SDTCisVT<4, i32>]>;

def SDTFPToxIntSatOp
: SDTypeProfile<1,
1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>]>;

def X86fp2sisat : SDNode<"X86ISD::FP_TO_SINT_SAT", SDTFPToxIntSatOp>;
def X86fp2uisat : SDNode<"X86ISD::FP_TO_UINT_SAT", SDTFPToxIntSatOp>;

def X86PAlignr : SDNode<"X86ISD::PALIGNR",
SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i8>,
SDTCisSameAs<0,1>,
Expand Down
85 changes: 85 additions & 0 deletions llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefixes=CHECK,X64

; VCVTTPD2DQS
define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
; CHECK-LABEL: test_signed_v8i32_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2dqs %zmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
ret <8 x i32> %x
}

; VCVTTPD2QQS
define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind {
; CHECK-LABEL: test_signed_v8i64_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2qqs %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f)
ret <8 x i64> %x
}

; VCVTTPD2UDQS
define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind {
; CHECK-LABEL: test_unsigned_v8i32_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2udqs %zmm0, %ymm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f)
ret <8 x i32> %x
}

; VCVTTPD2UQQS
define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind {
; CHECK-LABEL: test_unsigned_v8i64_v8f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttpd2uqqs %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f)
ret <8 x i64> %x
}

; VCVTTPS2DQS
define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind {
; CHECK-LABEL: test_signed_v16i32_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2dqs %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f)
ret <16 x i32> %x
}

; VCVTTPS2UDQS
define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind {
; CHECK-LABEL: test_unsigned_v16i32_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2udqs %zmm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f)
ret <16 x i32> %x
}
; VCVTTPS2QQS
define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind {
; CHECK-LABEL: test_signed_v8i64_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2qqs %ymm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f)
ret <8 x i64> %x
}

; VCVTTPS2UQQS
define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind {
; CHECK-LABEL: test_unsigned_v8i64_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvttps2uqqs %ymm0, %zmm0
; CHECK-NEXT: ret{{[l|q]}}
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f)
ret <8 x i64> %x
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; X64: {{.*}}
; X86: {{.*}}
Loading
Loading