Skip to content

[X86][AVX10.2] Map vector saturated converts to public intrinsics #121483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5456,6 +5456,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
}

case X86ISD::FP_TO_SINT_SAT_CUSTOM:
case X86ISD::FP_TO_UINT_SAT_CUSTOM:
if (Subtarget->hasAVX10_2()) {
bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM;
SDValue Op = Node->getOperand(0);
EVT VT = Node->getValueType(0);
EVT OpVT = Op.getValueType();
MachineSDNode *MachineNode;

if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
if (IsSigned)
MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl,
MVT::v4i32, Op);
else
MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl,
MVT::v4i32, Op);
}

if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
if (IsSigned)
MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl,
MVT::v2i64, Op);
else
MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl,
MVT::v2i64, Op);
}

SDValue NewNode = SDValue(MachineNode, 0);
ReplaceNode(Node, NewNode.getNode());
return;
}
break;

case X86ISD::ANDNP:
if (tryVPTERNLOG(Node))
return;
Expand Down
65 changes: 63 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasAVX10_2()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
MVT::v4i64, MVT::v8i64}) {
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
}
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
Expand Down Expand Up @@ -2656,6 +2661,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::UINT_TO_FP,
ISD::STRICT_SINT_TO_FP,
ISD::STRICT_UINT_TO_FP,
ISD::FP_TO_SINT_SAT,
ISD::FP_TO_UINT_SAT,
ISD::SETCC,
ISD::MUL,
ISD::XOR,
Expand Down Expand Up @@ -33665,6 +33672,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: {
if (!Subtarget.hasAVX10_2())
return;

bool IsSigned = Opc == ISD::FP_TO_SINT_SAT;
EVT VT = N->getValueType(0);
SDValue Op = N->getOperand(0);
EVT OpVT = Op.getValueType();
SDValue V4I32;

if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op);
if (IsSigned)
V4I32 =
DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
else
V4I32 =
DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
Results.push_back(V4I32);
return;
}
break;
}
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
Expand Down Expand Up @@ -34645,6 +34676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VPERMV3)
NODE_NAME_CASE(VPERMI)
NODE_NAME_CASE(VPTERNLOG)
NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM)
NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM)
NODE_NAME_CASE(VFIXUPIMM)
NODE_NAME_CASE(VFIXUPIMM_SAE)
NODE_NAME_CASE(VFIXUPIMMS)
Expand Down Expand Up @@ -56202,6 +56235,32 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX10_2())
return SDValue();

bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
EVT SrcVT = N->getOperand(0).getValueType();
EVT DstVT = N->getValueType(0);
SDLoc dl(N);

if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
// Convert v2f32 to v2f64
SDValue V2F64 =
DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0));

// Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node
if (IsSigned)
return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
else
return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
}

return SDValue();
}

static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");

Expand Down Expand Up @@ -59315,6 +59374,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
// clang-format on
}

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,10 @@ namespace llvm {
// Load x87 FPU environment from memory.
FLDENVm,

// Custom handling for FP_TO_xINT_SAT
FP_TO_SINT_SAT_CUSTOM,
FP_TO_UINT_SAT_CUSTOM,

/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
Expand Down
56 changes: 56 additions & 0 deletions llvm/lib/Target/X86/X86InstrAVX10.td
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,62 @@ let Predicates = [HasAVX10_2] in {
// patterns have been disabled with null_frag.
// Patterns VCVTTPD2DQSZ128

// VCVTTPD2DQS
def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2DQSZ256rr VR256X:$src)>;
def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
(VCVTTPD2DQSZrr VR512:$src)>;

// VCVTTPD2QQS
def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
(VCVTTPD2QQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
(VCVTTPD2QQSZ256rr VR256X:$src)>;
def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
(VCVTTPD2QQSZrr VR512:$src)>;

// VCVTTPD2UDQS
def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
(VCVTTPD2UDQSZ256rr VR256X:$src)>;
def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
(VCVTTPD2UDQSZrr VR512:$src)>;

// VCVTTPD2UQQS
def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
(VCVTTPD2UQQSZ128rr VR128X:$src)>;
def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
(VCVTTPD2UQQSZ256rr VR256X:$src)>;
def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
(VCVTTPD2UQQSZrr VR512:$src)>;

// VCVTTPS2DQS
def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
(VCVTTPS2DQSZ128rr VR128X:$src)>;
def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
(VCVTTPS2DQSZ256rr VR256X:$src)>;
def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2DQSZrr VR512:$src)>;

// VCVTTPS2QQS
def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2QQSZ256rr VR128X:$src)>;
def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
(VCVTTPS2QQSZrr VR256X:$src)>;

// VCVTTPS2UDQS
def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
(VCVTTPS2UDQSZ128rr VR128X:$src)>;
def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
(VCVTTPS2UDQSZ256rr VR256X:$src)>;
def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
(VCVTTPS2UDQSZrr VR512:$src)>;

// VCVTTPS2UQQS
def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
(VCVTTPS2UQQSZ256rr VR128X:$src)>;
def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
(VCVTTPS2UQQSZrr VR256X:$src)>;

def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
(VCVTTPD2DQSZ128rr VR128X:$src)>;
def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
Expand Down
122 changes: 122 additions & 0 deletions llvm/test/CodeGen/X86/avx10_2_512fptosi_satcvtds.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64

; VCVTTPD2DQS
define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
; X86-LABEL: test_signed_v8i32_v8f64:
; X86: # %bb.0:
; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_signed_v8i32_v8f64:
; X64: # %bb.0:
; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0
; X64-NEXT: retq
%x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
ret <8 x i32> %x
}

; VCVTTPD2QQS
define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind {
; X86-LABEL: test_signed_v8i64_v8f64:
; X86: # %bb.0:
; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: test_signed_v8i64_v8f64:
; X64: # %bb.0:
; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0
; X64-NEXT: retq
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f)
ret <8 x i64> %x
}

; VCVTTPD2UDQS
define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind {
; X86-LABEL: test_unsigned_v8i32_v8f64:
; X86: # %bb.0:
; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: test_unsigned_v8i32_v8f64:
; X64: # %bb.0:
; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0
; X64-NEXT: retq
%x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f)
ret <8 x i32> %x
}

; VCVTTPD2UQQS
define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind {
; X86-LABEL: test_unsigned_v8i64_v8f64:
; X86: # %bb.0:
; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: test_unsigned_v8i64_v8f64:
; X64: # %bb.0:
; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0
; X64-NEXT: retq
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f)
ret <8 x i64> %x
}

; VCVTTPS2DQS
define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind {
; X86-LABEL: test_signed_v16i32_v16f32:
; X86: # %bb.0:
; X86-NEXT: vcvttps2dqs %zmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: test_signed_v16i32_v16f32:
; X64: # %bb.0:
; X64-NEXT: vcvttps2dqs %zmm0, %zmm0
; X64-NEXT: retq
%x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f)
ret <16 x i32> %x
}

; VCVTTPS2UDQS
define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind {
; X86-LABEL: test_unsigned_v16i32_v16f32:
; X86: # %bb.0:
; X86-NEXT: vcvttps2udqs %zmm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: test_unsigned_v16i32_v16f32:
; X64: # %bb.0:
; X64-NEXT: vcvttps2udqs %zmm0, %zmm0
; X64-NEXT: retq
%x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f)
ret <16 x i32> %x
}
; VCVTTPS2QQS
define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind {
; X86-LABEL: test_signed_v8i64_v8f32:
; X86: # %bb.0:
; X86-NEXT: vcvttps2qqs %ymm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: test_signed_v8i64_v8f32:
; X64: # %bb.0:
; X64-NEXT: vcvttps2qqs %ymm0, %zmm0
; X64-NEXT: retq
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f)
ret <8 x i64> %x
}

; VCVTTPS2UQQS
define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind {
; X86-LABEL: test_unsigned_v8i64_v8f32:
; X86: # %bb.0:
; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0
; X86-NEXT: retl
;
; X64-LABEL: test_unsigned_v8i64_v8f32:
; X64: # %bb.0:
; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0
; X64-NEXT: retq
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f)
ret <8 x i64> %x
}
Loading
Loading