-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[X86][AVX10.2] Lower fmininum/fmaximum to VMINMAX* #121373
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesPatch is 24.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121373.diff 4 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e7f6032ee7d749..a0514e93d6598b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2442,6 +2442,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, VT, Legal);
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
}
if (Subtarget.hasAVX10_2_512()) {
setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
@@ -2451,6 +2453,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::v32bf16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::v32bf16, Custom);
}
for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
setCondCodeAction(ISD::SETOEQ, VT, Custom);
@@ -28842,6 +28846,20 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
SDValue X = Op.getOperand(0);
SDValue Y = Op.getOperand(1);
SDLoc DL(Op);
+ if (Subtarget.hasAVX10_2() && TLI.isTypeLegal(VT)) {
+ unsigned Opc = 0;
+ if (VT.isVector())
+ Opc = X86ISD::VMINMAX;
+ else if (VT == MVT::f16 || VT == MVT::f32 || VT == MVT::f64)
+ Opc = X86ISD::VMINMAXS;
+
+ if (Opc) {
+ SDValue Imm =
+ DAG.getTargetConstant(Op.getOpcode() == ISD::FMAXIMUM, DL, MVT::i32);
+ return DAG.getNode(Opc, DL, VT, X, Y, Imm, Op->getFlags());
+ }
+ }
+
uint64_t SizeInBits = VT.getScalarSizeInBits();
APInt PreferredZero = APInt::getZero(SizeInBits);
APInt OppositeZero = PreferredZero;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 0301c07dfb540b..3bc64eda01a9ce 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -403,28 +403,42 @@ multiclass avx10_minmax_scalar<string OpStr, X86VectorVTInfo _, SDNode OpNode,
SDNode OpNodeSAE> {
let ExeDomain = _.ExeDomain, Predicates = [HasAVX10_2] in {
let mayRaiseFPException = 1 in {
- defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>;
-
- defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
- (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
- OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
- (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
- (i32 timm:$src3)))>,
+ let isCodeGenOnly = 1 in {
+ def rri : AVX512Ii8<0x53, MRMSrcReg, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX]>;
+
+ def rmi : AVX512Ii8<0x53, MRMSrcMem, (outs _.FRC:$dst),
+ (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2),
+ (i32 timm:$src3)))]>,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ }
+ defm rri_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3)))>,
+ Sched<[WriteFMAX]>;
+
+ defm rmi_Int : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
+ OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
+ (i32 timm:$src3)))>,
Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in
- defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
- (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
- OpStr, "$src3, {sae}, $src2, $src1",
- "$src1, $src2, {sae}, $src3",
- (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
- (i32 timm:$src3)))>,
- Sched<[WriteFMAX]>, EVEX_B;
+ defm rrib_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3),
+ OpStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
+ (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+ (i32 timm:$src3)))>,
+ Sched<[WriteFMAX]>, EVEX_B;
}
}
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index c6da0c5ca4792c..1dcce5336895f0 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX10_2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86
declare float @llvm.maximum.f32(float, float)
@@ -73,6 +74,11 @@ define float @test_fmaximum(float %x, float %y) nounwind {
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -110,6 +116,11 @@ define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_scalarize:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_scalarize:
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
@@ -129,6 +140,11 @@ define float @test_fmaximum_nan0(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nan0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nan0:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -148,6 +164,11 @@ define float @test_fmaximum_nan1(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nan1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nan1:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -215,6 +236,13 @@ define float @test_fmaximum_nnan(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vaddss %xmm1, %xmm0, %xmm2
+; AVX10_2-NEXT: vsubss %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxss $1, %xmm0, %xmm2
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nnan:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -272,6 +300,12 @@ define double @test_fmaximum_zero0(double %x, double %y) nounwind {
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX10_2-NEXT: vminmaxsd $1, %xmm0, %xmm1
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero0:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -323,6 +357,12 @@ define double @test_fmaximum_zero1(double %x, double %y) nounwind {
; AVX512-NEXT: vmovapd %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxsd $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -354,6 +394,11 @@ define double @test_fmaximum_zero2(double %x, double %y) {
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_zero2:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_zero2:
; X86: # %bb.0:
; X86-NEXT: fldz
@@ -390,6 +435,11 @@ define float @test_fmaximum_nsz(float %x, float %y) "no-signed-zeros-fp-math"="t
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_nsz:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_nsz:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -474,6 +524,12 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vmaxss %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_combine_cmps:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxss $1, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_combine_cmps:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -562,6 +618,11 @@ define float @test_fminimum(float %x, float %y) nounwind {
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -599,6 +660,11 @@ define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "
; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_scalarize:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_scalarize:
; X86: # %bb.0:
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
@@ -618,6 +684,11 @@ define float @test_fminimum_nan0(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nan0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nan0:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -637,6 +708,11 @@ define float @test_fminimum_nan1(float %x, float %y) {
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nan1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nan1:
; X86: # %bb.0:
; X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
@@ -695,6 +771,11 @@ define double @test_fminimum_nnan(double %x, double %y) "no-nans-fp-math"="true"
; AVX512DQ-NEXT: vminsd %xmm2, %xmm1, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nnan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nnan:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -749,6 +830,11 @@ define double @test_fminimum_zero0(double %x, double %y) nounwind {
; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero0:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero0:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -796,6 +882,11 @@ define double @test_fminimum_zero1(double %x, double %y) nounwind {
; AVX512-NEXT: vmovapd %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero1:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxsd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
@@ -826,6 +917,11 @@ define double @test_fminimum_zero2(double %x, double %y) {
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_zero2:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vmovsd {{.*#+}} xmm0 = [-0.0E+0,0.0E+0]
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_zero2:
; X86: # %bb.0:
; X86-NEXT: fldz
@@ -863,6 +959,11 @@ define float @test_fminimum_nsz(float %x, float %y) nounwind {
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_nsz:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_nsz:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -948,6 +1049,12 @@ define float @test_fminimum_combine_cmps(float %x, float %y) nounwind {
; AVX512DQ-NEXT: vminss %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_combine_cmps:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vdivss %xmm0, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxss $0, %xmm1, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_combine_cmps:
; X86: # %bb.0:
; X86-NEXT: pushl %eax
@@ -1009,6 +1116,11 @@ define <2 x double> @test_fminimum_vector(<2 x double> %x, <2 x double> %y) {
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector:
; X86: # %bb.0:
; X86-NEXT: vblendvpd %xmm0, %xmm0, %xmm1, %xmm2
@@ -1032,6 +1144,11 @@ define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nan
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector:
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
@@ -1054,6 +1171,12 @@ define <2 x double> @test_fminimum_vector_zero(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_zero:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1077,6 +1200,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero(<4 x float> %x) {
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_signed_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_signed_zero:
; X86: # %bb.0:
; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
@@ -1102,6 +1230,13 @@ define <2 x double> @test_fminimum_vector_partially_zero(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_partially_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_partially_zero:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1149,6 +1284,13 @@ define <2 x double> @test_fminimum_vector_different_zeros(<2 x double> %x) {
; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_different_zeros:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_different_zeros:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1177,6 +1319,11 @@ define <4 x float> @test_fmaximum_vector_non_zero(<4 x float> %x) {
; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fmaximum_vector_non_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxps $1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fmaximum_vector_non_zero:
; X86: # %bb.0:
; X86-NEXT: vmovaps {{.*#+}} xmm1 = [5.0E+0,4.0E+0,3.0E+0,2.0E+0]
@@ -1206,6 +1353,13 @@ define <2 x double> @test_fminimum_vector_nan(<2 x double> %x) {
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_nan:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_nan:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1232,6 +1386,12 @@ define <2 x double> @test_fminimum_vector_zero_first(<2 x double> %x) {
; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_zero_first:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX10_2-NEXT: vminmaxpd $0, %xmm1, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_zero_first:
; X86: # %bb.0:
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
@@ -1260,6 +1420,11 @@ define <2 x double> @test_fminimum_vector_signed_zero(<2 x double> %x) {
; AVX-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
; AVX-NEXT: retq
;
+; AVX10_2-LABEL: test_fminimum_vector_signed_zero:
+; AVX10_2: # %bb.0:
+; AVX10_2-NEXT: vminmaxpd $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; AVX10_2-NEXT: retq
+;
; X86-LABEL: test_fminimum_vector_signed_zero:
; X86: # %bb.0:
; X86-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm1
@@ -1284,6 +1449,11 @@ define <4 x float> @test_fmaximum_vector_signed_zero_first(<4 x float> %x) {
; AVX-NEXT...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
{X86::VMINMAXSDrrik, X86::VMINMAXSDrmik, TB_NO_REVERSE}, | ||
{X86::VMINMAXSHrrik, X86::VMINMAXSHrmik, TB_NO_REVERSE}, | ||
{X86::VMINMAXSSrrik, X86::VMINMAXSSrmik, TB_NO_REVERSE}, | ||
{X86::VMINMAXSDrri_Intk, X86::VMINMAXSDrmi_Intk, TB_NO_REVERSE}, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Slightly off topic, it looks like having a pattern rri_Intk
is just a problem of adding _Int
in a wrong place. Should we generally refactor all patterns that introduce more suffixes after _Int
so that _Int
is always the last? Or is it intended to show at which level an intrinsic was introduced (I assumed Int
is for intrinsic).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move _Int
to the end looks good to me. Here you go: #121450
Address comment at #121373 (comment)
No description provided.