Skip to content

Commit d87eced

Browse files
committed
[X86] Combine fminnum/fmaxnum with non-nan operand to fmin/fmax
If we have a known non-nan operand, place it in the second operand of fmin/fmax that is returned if either operand is nan. Differential Revision: https://reviews.llvm.org/D62448 llvm-svn: 361704
1 parent 6bb5041 commit d87eced

File tree

4 files changed

+29
-121
lines changed

4 files changed

+29
-121
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -40511,9 +40511,6 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
4051140511

4051240512
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4051340513

40514-
// TODO: If an operand is already known to be a NaN or not a NaN, this
40515-
// should be an optional swap and FMAX/FMIN.
40516-
4051740514
EVT VT = N->getValueType(0);
4051840515
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
4051940516
(Subtarget.hasSSE2() && VT == MVT::f64) ||
@@ -40530,6 +40527,13 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
4053040527
if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs())
4053140528
return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
4053240529

40530+
// If one of the operands is known non-NaN use the native min/max instructions
40531+
// with the non-NaN input as second operand.
40532+
if (DAG.isKnownNeverNaN(Op1))
40533+
return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags());
40534+
if (DAG.isKnownNeverNaN(Op0))
40535+
return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags());
40536+
4053340537
// If we have to respect NaN inputs, this takes at least 3 instructions.
4053440538
// Favor a library call when operating on a scalar and minimizing code size.
4053540539
if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize())

llvm/test/CodeGen/X86/extract-fp.ll

+2-18
Original file line numberDiff line numberDiff line change
@@ -86,16 +86,8 @@ define float @ext_frem_v4f32_constant_op0(<4 x float> %x) {
8686
define float @ext_maxnum_v4f32(<4 x float> %x) nounwind {
8787
; CHECK-LABEL: ext_maxnum_v4f32:
8888
; CHECK: # %bb.0:
89-
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
9089
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
91-
; CHECK-NEXT: movaps %xmm0, %xmm1
92-
; CHECK-NEXT: cmpunordss %xmm0, %xmm1
93-
; CHECK-NEXT: movaps %xmm1, %xmm3
94-
; CHECK-NEXT: andps %xmm2, %xmm3
95-
; CHECK-NEXT: maxss %xmm0, %xmm2
96-
; CHECK-NEXT: andnps %xmm2, %xmm1
97-
; CHECK-NEXT: orps %xmm3, %xmm1
98-
; CHECK-NEXT: movaps %xmm1, %xmm0
90+
; CHECK-NEXT: maxss {{.*}}(%rip), %xmm0
9991
; CHECK-NEXT: retq
10092
%v = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 1.0, float 2.0, float 3.0>)
10193
%r = extractelement <4 x float> %v, i32 2
@@ -105,16 +97,8 @@ define float @ext_maxnum_v4f32(<4 x float> %x) nounwind {
10597
define double @ext_minnum_v2f64(<2 x double> %x) nounwind {
10698
; CHECK-LABEL: ext_minnum_v2f64:
10799
; CHECK: # %bb.0:
108-
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
109100
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
110-
; CHECK-NEXT: movapd %xmm0, %xmm1
111-
; CHECK-NEXT: cmpunordsd %xmm0, %xmm1
112-
; CHECK-NEXT: movapd %xmm1, %xmm3
113-
; CHECK-NEXT: andpd %xmm2, %xmm3
114-
; CHECK-NEXT: minsd %xmm0, %xmm2
115-
; CHECK-NEXT: andnpd %xmm2, %xmm1
116-
; CHECK-NEXT: orpd %xmm3, %xmm1
117-
; CHECK-NEXT: movapd %xmm1, %xmm0
101+
; CHECK-NEXT: minsd {{.*}}(%rip), %xmm0
118102
; CHECK-NEXT: retq
119103
%v = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double 0.0, double 1.0>, <2 x double> %x)
120104
%r = extractelement <2 x double> %v, i32 1

llvm/test/CodeGen/X86/fmaxnum.ll

+10-50
Original file line numberDiff line numberDiff line change
@@ -472,67 +472,27 @@ define <2 x double> @maxnum_intrinsic_nnan_attr_f64(<2 x double> %a, <2 x double
472472
define float @test_maxnum_const_op1(float %x) {
473473
; SSE-LABEL: test_maxnum_const_op1:
474474
; SSE: # %bb.0:
475-
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
476-
; SSE-NEXT: movaps %xmm0, %xmm1
477-
; SSE-NEXT: cmpunordss %xmm0, %xmm1
478-
; SSE-NEXT: movaps %xmm1, %xmm3
479-
; SSE-NEXT: andps %xmm2, %xmm3
480-
; SSE-NEXT: maxss %xmm0, %xmm2
481-
; SSE-NEXT: andnps %xmm2, %xmm1
482-
; SSE-NEXT: orps %xmm3, %xmm1
483-
; SSE-NEXT: movaps %xmm1, %xmm0
475+
; SSE-NEXT: maxss {{.*}}(%rip), %xmm0
484476
; SSE-NEXT: retq
485477
;
486-
; AVX1-LABEL: test_maxnum_const_op1:
487-
; AVX1: # %bb.0:
488-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
489-
; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2
490-
; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
491-
; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
492-
; AVX1-NEXT: retq
493-
;
494-
; AVX512-LABEL: test_maxnum_const_op1:
495-
; AVX512: # %bb.0:
496-
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
497-
; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1
498-
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
499-
; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
500-
; AVX512-NEXT: vmovaps %xmm1, %xmm0
501-
; AVX512-NEXT: retq
478+
; AVX-LABEL: test_maxnum_const_op1:
479+
; AVX: # %bb.0:
480+
; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0
481+
; AVX-NEXT: retq
502482
%r = call float @llvm.maxnum.f32(float 1.0, float %x)
503483
ret float %r
504484
}
505485

506486
define float @test_maxnum_const_op2(float %x) {
507487
; SSE-LABEL: test_maxnum_const_op2:
508488
; SSE: # %bb.0:
509-
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
510-
; SSE-NEXT: movaps %xmm0, %xmm1
511-
; SSE-NEXT: cmpunordss %xmm0, %xmm1
512-
; SSE-NEXT: movaps %xmm1, %xmm3
513-
; SSE-NEXT: andps %xmm2, %xmm3
514-
; SSE-NEXT: maxss %xmm0, %xmm2
515-
; SSE-NEXT: andnps %xmm2, %xmm1
516-
; SSE-NEXT: orps %xmm3, %xmm1
517-
; SSE-NEXT: movaps %xmm1, %xmm0
489+
; SSE-NEXT: maxss {{.*}}(%rip), %xmm0
518490
; SSE-NEXT: retq
519491
;
520-
; AVX1-LABEL: test_maxnum_const_op2:
521-
; AVX1: # %bb.0:
522-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
523-
; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2
524-
; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
525-
; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
526-
; AVX1-NEXT: retq
527-
;
528-
; AVX512-LABEL: test_maxnum_const_op2:
529-
; AVX512: # %bb.0:
530-
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
531-
; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1
532-
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
533-
; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
534-
; AVX512-NEXT: vmovaps %xmm1, %xmm0
535-
; AVX512-NEXT: retq
492+
; AVX-LABEL: test_maxnum_const_op2:
493+
; AVX: # %bb.0:
494+
; AVX-NEXT: vmaxss {{.*}}(%rip), %xmm0, %xmm0
495+
; AVX-NEXT: retq
536496
%r = call float @llvm.maxnum.f32(float %x, float 1.0)
537497
ret float %r
538498
}

llvm/test/CodeGen/X86/fminnum.ll

+10-50
Original file line numberDiff line numberDiff line change
@@ -472,67 +472,27 @@ define <4 x float> @minnum_intrinsic_nnan_attr_v4f32(<4 x float> %a, <4 x float>
472472
define float @test_minnum_const_op1(float %x) {
473473
; SSE-LABEL: test_minnum_const_op1:
474474
; SSE: # %bb.0:
475-
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
476-
; SSE-NEXT: movaps %xmm0, %xmm1
477-
; SSE-NEXT: cmpunordss %xmm0, %xmm1
478-
; SSE-NEXT: movaps %xmm1, %xmm3
479-
; SSE-NEXT: andps %xmm2, %xmm3
480-
; SSE-NEXT: minss %xmm0, %xmm2
481-
; SSE-NEXT: andnps %xmm2, %xmm1
482-
; SSE-NEXT: orps %xmm3, %xmm1
483-
; SSE-NEXT: movaps %xmm1, %xmm0
475+
; SSE-NEXT: minss {{.*}}(%rip), %xmm0
484476
; SSE-NEXT: retq
485477
;
486-
; AVX1-LABEL: test_minnum_const_op1:
487-
; AVX1: # %bb.0:
488-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
489-
; AVX1-NEXT: vminss %xmm0, %xmm1, %xmm2
490-
; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
491-
; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
492-
; AVX1-NEXT: retq
493-
;
494-
; AVX512-LABEL: test_minnum_const_op1:
495-
; AVX512: # %bb.0:
496-
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
497-
; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1
498-
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
499-
; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
500-
; AVX512-NEXT: vmovaps %xmm1, %xmm0
501-
; AVX512-NEXT: retq
478+
; AVX-LABEL: test_minnum_const_op1:
479+
; AVX: # %bb.0:
480+
; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
481+
; AVX-NEXT: retq
502482
%r = call float @llvm.minnum.f32(float 1.0, float %x)
503483
ret float %r
504484
}
505485

506486
define float @test_minnum_const_op2(float %x) {
507487
; SSE-LABEL: test_minnum_const_op2:
508488
; SSE: # %bb.0:
509-
; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
510-
; SSE-NEXT: movaps %xmm0, %xmm1
511-
; SSE-NEXT: cmpunordss %xmm0, %xmm1
512-
; SSE-NEXT: movaps %xmm1, %xmm3
513-
; SSE-NEXT: andps %xmm2, %xmm3
514-
; SSE-NEXT: minss %xmm0, %xmm2
515-
; SSE-NEXT: andnps %xmm2, %xmm1
516-
; SSE-NEXT: orps %xmm3, %xmm1
517-
; SSE-NEXT: movaps %xmm1, %xmm0
489+
; SSE-NEXT: minss {{.*}}(%rip), %xmm0
518490
; SSE-NEXT: retq
519491
;
520-
; AVX1-LABEL: test_minnum_const_op2:
521-
; AVX1: # %bb.0:
522-
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
523-
; AVX1-NEXT: vminss %xmm0, %xmm1, %xmm2
524-
; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
525-
; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
526-
; AVX1-NEXT: retq
527-
;
528-
; AVX512-LABEL: test_minnum_const_op2:
529-
; AVX512: # %bb.0:
530-
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
531-
; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1
532-
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
533-
; AVX512-NEXT: vmovss %xmm2, %xmm0, %xmm1 {%k1}
534-
; AVX512-NEXT: vmovaps %xmm1, %xmm0
535-
; AVX512-NEXT: retq
492+
; AVX-LABEL: test_minnum_const_op2:
493+
; AVX: # %bb.0:
494+
; AVX-NEXT: vminss {{.*}}(%rip), %xmm0, %xmm0
495+
; AVX-NEXT: retq
536496
%r = call float @llvm.minnum.f32(float %x, float 1.0)
537497
ret float %r
538498
}

0 commit comments

Comments
 (0)