Skip to content

Commit 7bff377

Browse files
committed
[SDAG] Check fminnum/fmaxnum for non-zero operand.
Currently, in TargetLowering, if the target does not support fminnum, we lower to fminimum if neither operand could be a NaN. But this isn't quite correct because fminnum and fminimum treat +/-0 differently; so, we need to prove that one of the operands isn't a zero, or we don't have signed zeros. Differential Revision: https://reviews.llvm.org/D143256
1 parent a7de5c8 commit 7bff377

File tree

7 files changed

+426
-91
lines changed

7 files changed

+426
-91
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4918,9 +4918,21 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
49184918
"Floating point type expected");
49194919

49204920
// If the value is a constant, we can obviously see if it is a zero or not.
4921-
// TODO: Add BuildVector support.
49224921
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
49234922
return !C->isZero();
4923+
4924+
// Return false if we find any zero in a vector.
4925+
if (Op->getOpcode() == ISD::BUILD_VECTOR ||
4926+
Op->getOpcode() == ISD::SPLAT_VECTOR) {
4927+
for (const SDValue &OpVal : Op->op_values()) {
4928+
if (OpVal.isUndef())
4929+
return false;
4930+
if (auto *C = dyn_cast<ConstantFPSDNode>(OpVal))
4931+
if (C->isZero())
4932+
return false;
4933+
}
4934+
return true;
4935+
}
49244936
return false;
49254937
}
49264938

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7985,14 +7985,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
79857985
}
79867986

79877987
// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
7988-
// instead if there are no NaNs.
7989-
if (Node->getFlags().hasNoNaNs()) {
7988+
// instead if there are no NaNs and there can't be an incompatiable zero
7989+
// compare: at least one operand isn't +/-0, or there are no signed-zeros.
7990+
if (Node->getFlags().hasNoNaNs() &&
7991+
(Node->getFlags().hasNoSignedZeros() ||
7992+
DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
7993+
DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
79907994
unsigned IEEE2018Op =
79917995
Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
7992-
if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
7996+
if (isOperationLegalOrCustom(IEEE2018Op, VT))
79937997
return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
79947998
Node->getOperand(1), Node->getFlags());
7995-
}
79967999
}
79978000

79988001
if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))

llvm/test/CodeGen/ARM/lower-vmax.ll

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
12
; RUN: llc -mtriple=arm-eabihf -mattr=+neon < %s | FileCheck -check-prefixes=CHECK-NO_NEON %s
23
; RUN: llc -mtriple=arm-eabihf -mattr=+neon,+neonfp < %s | FileCheck -check-prefixes=CHECK-NEON %s
34

45
define float @max_f32(float, float) {
5-
;CHECK-NEON: vmax.f32
6-
;CHECK-NO_NEON: vcmp.f32
7-
;CHECK-NO_NEON: vmrs
8-
;CHECK-NO_NEON: vmovgt.f32
6+
; CHECK-NO_NEON-LABEL: max_f32:
7+
; CHECK-NO_NEON: @ %bb.0:
8+
; CHECK-NO_NEON-NEXT: vcmp.f32 s1, s0
9+
; CHECK-NO_NEON-NEXT: vmrs APSR_nzcv, fpscr
10+
; CHECK-NO_NEON-NEXT: vmovgt.f32 s0, s1
11+
; CHECK-NO_NEON-NEXT: mov pc, lr
12+
;
13+
; CHECK-NEON-LABEL: max_f32:
14+
; CHECK-NEON: @ %bb.0:
15+
; CHECK-NEON-NEXT: vcmp.f32 s1, s0
16+
; CHECK-NEON-NEXT: vmrs APSR_nzcv, fpscr
17+
; CHECK-NEON-NEXT: vmovgt.f32 s0, s1
18+
; CHECK-NEON-NEXT: mov pc, lr
919
%3 = call nnan float @llvm.maxnum.f32(float %1, float %0)
1020
ret float %3
1121
}
1222

1323
declare float @llvm.maxnum.f32(float, float) #1
1424

1525
define float @min_f32(float, float) {
16-
;CHECK-NEON: vmin.f32
17-
;CHECK-NO_NEON: vcmp.f32
18-
;CHECK-NO_NEON: vmrs
19-
;CHECK-NO_NEON: vmovlt.f32
26+
; CHECK-NO_NEON-LABEL: min_f32:
27+
; CHECK-NO_NEON: @ %bb.0:
28+
; CHECK-NO_NEON-NEXT: vcmp.f32 s1, s0
29+
; CHECK-NO_NEON-NEXT: vmrs APSR_nzcv, fpscr
30+
; CHECK-NO_NEON-NEXT: vmovlt.f32 s0, s1
31+
; CHECK-NO_NEON-NEXT: mov pc, lr
32+
;
33+
; CHECK-NEON-LABEL: min_f32:
34+
; CHECK-NEON: @ %bb.0:
35+
; CHECK-NEON-NEXT: vcmp.f32 s1, s0
36+
; CHECK-NEON-NEXT: vmrs APSR_nzcv, fpscr
37+
; CHECK-NEON-NEXT: vmovlt.f32 s0, s1
38+
; CHECK-NEON-NEXT: mov pc, lr
2039
%3 = call nnan float @llvm.minnum.f32(float %1, float %0)
2140
ret float %3
2241
}

llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll

Lines changed: 105 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -502,13 +502,24 @@ define double @fmaxnum64_non_zero_intrinsic(double %x) {
502502
define <4 x float> @fminnumv432_intrinsic(<4 x float> %x, <4 x float> %y) {
503503
; ARMV7-LABEL: fminnumv432_intrinsic:
504504
; ARMV7: @ %bb.0:
505-
; ARMV7-NEXT: vmov d17, r2, r3
506-
; ARMV7-NEXT: vmov d16, r0, r1
507-
; ARMV7-NEXT: mov r0, sp
508-
; ARMV7-NEXT: vld1.64 {d18, d19}, [r0]
509-
; ARMV7-NEXT: vmin.f32 q8, q8, q9
510-
; ARMV7-NEXT: vmov r0, r1, d16
511-
; ARMV7-NEXT: vmov r2, r3, d17
505+
; ARMV7-NEXT: mov r12, sp
506+
; ARMV7-NEXT: vld1.64 {d0, d1}, [r12]
507+
; ARMV7-NEXT: vmov d3, r2, r3
508+
; ARMV7-NEXT: vmov d2, r0, r1
509+
; ARMV7-NEXT: vcmp.f32 s7, s3
510+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
511+
; ARMV7-NEXT: vcmp.f32 s6, s2
512+
; ARMV7-NEXT: vmovlt.f32 s3, s7
513+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
514+
; ARMV7-NEXT: vcmp.f32 s5, s1
515+
; ARMV7-NEXT: vmovlt.f32 s2, s6
516+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
517+
; ARMV7-NEXT: vcmp.f32 s4, s0
518+
; ARMV7-NEXT: vmovlt.f32 s1, s5
519+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
520+
; ARMV7-NEXT: vmovlt.f32 s0, s4
521+
; ARMV7-NEXT: vmov r2, r3, d1
522+
; ARMV7-NEXT: vmov r0, r1, d0
512523
; ARMV7-NEXT: bx lr
513524
;
514525
; ARMV8-LABEL: fminnumv432_intrinsic:
@@ -635,21 +646,31 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
635646
define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
636647
; ARMV7-LABEL: fminnumv432_one_zero_intrinsic:
637648
; ARMV7: @ %bb.0:
638-
; ARMV7-NEXT: vmov d17, r2, r3
639-
; ARMV7-NEXT: vmov d16, r0, r1
640-
; ARMV7-NEXT: adr r0, .LCPI18_0
641-
; ARMV7-NEXT: vld1.64 {d18, d19}, [r0:128]
642-
; ARMV7-NEXT: vmin.f32 q8, q8, q9
643-
; ARMV7-NEXT: vmov r0, r1, d16
644-
; ARMV7-NEXT: vmov r2, r3, d17
649+
; ARMV7-NEXT: vmov d3, r2, r3
650+
; ARMV7-NEXT: vmov d2, r0, r1
651+
; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00
652+
; ARMV7-NEXT: vcmp.f32 s5, #0
653+
; ARMV7-NEXT: vldr s1, .LCPI18_0
654+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
655+
; ARMV7-NEXT: vcmp.f32 s7, s0
656+
; ARMV7-NEXT: vmovlt.f32 s1, s5
657+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
658+
; ARMV7-NEXT: vmov.f32 s3, s0
659+
; ARMV7-NEXT: vcmp.f32 s6, s0
660+
; ARMV7-NEXT: vmovlt.f32 s3, s7
661+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
662+
; ARMV7-NEXT: vmov.f32 s2, s0
663+
; ARMV7-NEXT: vcmp.f32 s4, s0
664+
; ARMV7-NEXT: vmovlt.f32 s2, s6
665+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
666+
; ARMV7-NEXT: vmovlt.f32 s0, s4
667+
; ARMV7-NEXT: vmov r2, r3, d1
668+
; ARMV7-NEXT: vmov r0, r1, d0
645669
; ARMV7-NEXT: bx lr
646-
; ARMV7-NEXT: .p2align 4
670+
; ARMV7-NEXT: .p2align 2
647671
; ARMV7-NEXT: @ %bb.1:
648672
; ARMV7-NEXT: .LCPI18_0:
649-
; ARMV7-NEXT: .long 0xbf800000 @ float -1
650673
; ARMV7-NEXT: .long 0x00000000 @ float 0
651-
; ARMV7-NEXT: .long 0xbf800000 @ float -1
652-
; ARMV7-NEXT: .long 0xbf800000 @ float -1
653674
;
654675
; ARMV8-LABEL: fminnumv432_one_zero_intrinsic:
655676
; ARMV8: @ %bb.0:
@@ -697,13 +718,24 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
697718
define <4 x float> @fmaxnumv432_intrinsic(<4 x float> %x, <4 x float> %y) {
698719
; ARMV7-LABEL: fmaxnumv432_intrinsic:
699720
; ARMV7: @ %bb.0:
700-
; ARMV7-NEXT: vmov d17, r2, r3
701-
; ARMV7-NEXT: vmov d16, r0, r1
702-
; ARMV7-NEXT: mov r0, sp
703-
; ARMV7-NEXT: vld1.64 {d18, d19}, [r0]
704-
; ARMV7-NEXT: vmax.f32 q8, q8, q9
705-
; ARMV7-NEXT: vmov r0, r1, d16
706-
; ARMV7-NEXT: vmov r2, r3, d17
721+
; ARMV7-NEXT: mov r12, sp
722+
; ARMV7-NEXT: vld1.64 {d0, d1}, [r12]
723+
; ARMV7-NEXT: vmov d3, r2, r3
724+
; ARMV7-NEXT: vmov d2, r0, r1
725+
; ARMV7-NEXT: vcmp.f32 s7, s3
726+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
727+
; ARMV7-NEXT: vcmp.f32 s6, s2
728+
; ARMV7-NEXT: vmovgt.f32 s3, s7
729+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
730+
; ARMV7-NEXT: vcmp.f32 s5, s1
731+
; ARMV7-NEXT: vmovgt.f32 s2, s6
732+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
733+
; ARMV7-NEXT: vcmp.f32 s4, s0
734+
; ARMV7-NEXT: vmovgt.f32 s1, s5
735+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
736+
; ARMV7-NEXT: vmovgt.f32 s0, s4
737+
; ARMV7-NEXT: vmov r2, r3, d1
738+
; ARMV7-NEXT: vmov r0, r1, d0
707739
; ARMV7-NEXT: bx lr
708740
;
709741
; ARMV8-LABEL: fmaxnumv432_intrinsic:
@@ -789,13 +821,31 @@ define <4 x float> @fmaxnumv432_nsz_intrinsic(<4 x float> %x, <4 x float> %y) {
789821
define <4 x float> @fmaxnumv432_zero_intrinsic(<4 x float> %x) {
790822
; ARMV7-LABEL: fmaxnumv432_zero_intrinsic:
791823
; ARMV7: @ %bb.0:
792-
; ARMV7-NEXT: vmov d19, r2, r3
793-
; ARMV7-NEXT: vmov.i32 q8, #0x0
794-
; ARMV7-NEXT: vmov d18, r0, r1
795-
; ARMV7-NEXT: vmax.f32 q8, q9, q8
796-
; ARMV7-NEXT: vmov r0, r1, d16
797-
; ARMV7-NEXT: vmov r2, r3, d17
824+
; ARMV7-NEXT: vmov d3, r2, r3
825+
; ARMV7-NEXT: vldr s0, .LCPI21_0
826+
; ARMV7-NEXT: vmov d2, r0, r1
827+
; ARMV7-NEXT: vcmp.f32 s7, #0
828+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
829+
; ARMV7-NEXT: vmov.f32 s3, s0
830+
; ARMV7-NEXT: vcmp.f32 s6, #0
831+
; ARMV7-NEXT: vmovgt.f32 s3, s7
832+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
833+
; ARMV7-NEXT: vmov.f32 s2, s0
834+
; ARMV7-NEXT: vcmp.f32 s5, #0
835+
; ARMV7-NEXT: vmovgt.f32 s2, s6
836+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
837+
; ARMV7-NEXT: vmov.f32 s1, s0
838+
; ARMV7-NEXT: vcmp.f32 s4, #0
839+
; ARMV7-NEXT: vmovgt.f32 s1, s5
840+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
841+
; ARMV7-NEXT: vmovgt.f32 s0, s4
842+
; ARMV7-NEXT: vmov r2, r3, d1
843+
; ARMV7-NEXT: vmov r0, r1, d0
798844
; ARMV7-NEXT: bx lr
845+
; ARMV7-NEXT: .p2align 2
846+
; ARMV7-NEXT: @ %bb.1:
847+
; ARMV7-NEXT: .LCPI21_0:
848+
; ARMV7-NEXT: .long 0x00000000 @ float 0
799849
;
800850
; ARMV8-LABEL: fmaxnumv432_zero_intrinsic:
801851
; ARMV8: @ %bb.0:
@@ -834,13 +884,31 @@ define <4 x float> @fmaxnumv432_zero_intrinsic(<4 x float> %x) {
834884
define <4 x float> @fmaxnumv432_minus_zero_intrinsic(<4 x float> %x) {
835885
; ARMV7-LABEL: fmaxnumv432_minus_zero_intrinsic:
836886
; ARMV7: @ %bb.0:
837-
; ARMV7-NEXT: vmov d19, r2, r3
838-
; ARMV7-NEXT: vmov.i32 q8, #0x80000000
839-
; ARMV7-NEXT: vmov d18, r0, r1
840-
; ARMV7-NEXT: vmax.f32 q8, q9, q8
841-
; ARMV7-NEXT: vmov r0, r1, d16
842-
; ARMV7-NEXT: vmov r2, r3, d17
887+
; ARMV7-NEXT: vldr s0, .LCPI22_0
888+
; ARMV7-NEXT: vmov d3, r2, r3
889+
; ARMV7-NEXT: vmov d2, r0, r1
890+
; ARMV7-NEXT: vcmp.f32 s7, s0
891+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
892+
; ARMV7-NEXT: vmov.f32 s3, s0
893+
; ARMV7-NEXT: vcmp.f32 s6, s0
894+
; ARMV7-NEXT: vmovgt.f32 s3, s7
895+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
896+
; ARMV7-NEXT: vmov.f32 s2, s0
897+
; ARMV7-NEXT: vcmp.f32 s5, s0
898+
; ARMV7-NEXT: vmovgt.f32 s2, s6
899+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
900+
; ARMV7-NEXT: vmov.f32 s1, s0
901+
; ARMV7-NEXT: vcmp.f32 s4, s0
902+
; ARMV7-NEXT: vmovgt.f32 s1, s5
903+
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
904+
; ARMV7-NEXT: vmovgt.f32 s0, s4
905+
; ARMV7-NEXT: vmov r2, r3, d1
906+
; ARMV7-NEXT: vmov r0, r1, d0
843907
; ARMV7-NEXT: bx lr
908+
; ARMV7-NEXT: .p2align 2
909+
; ARMV7-NEXT: @ %bb.1:
910+
; ARMV7-NEXT: .LCPI22_0:
911+
; ARMV7-NEXT: .long 0x80000000 @ float -0
844912
;
845913
; ARMV8-LABEL: fmaxnumv432_minus_zero_intrinsic:
846914
; ARMV8: @ %bb.0:

llvm/test/CodeGen/WebAssembly/f32.ll

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,13 @@ define float @fminnum32_intrinsic(float %x, float %y) {
217217
; CHECK-LABEL: fminnum32_intrinsic:
218218
; CHECK: .functype fminnum32_intrinsic (f32, f32) -> (f32)
219219
; CHECK-NEXT: # %bb.0:
220-
; CHECK-NEXT: local.get $push2=, 0
221-
; CHECK-NEXT: local.get $push1=, 1
222-
; CHECK-NEXT: f32.min $push0=, $pop2, $pop1
223-
; CHECK-NEXT: return $pop0
220+
; CHECK-NEXT: local.get $push5=, 0
221+
; CHECK-NEXT: local.get $push4=, 1
222+
; CHECK-NEXT: local.get $push3=, 0
223+
; CHECK-NEXT: local.get $push2=, 1
224+
; CHECK-NEXT: f32.lt $push0=, $pop3, $pop2
225+
; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0
226+
; CHECK-NEXT: return $pop1
224227
%a = call nnan float @llvm.minnum.f32(float %x, float %y)
225228
ret float %a
226229
}
@@ -267,10 +270,13 @@ define float @fmaxnum32_intrinsic(float %x, float %y) {
267270
; CHECK-LABEL: fmaxnum32_intrinsic:
268271
; CHECK: .functype fmaxnum32_intrinsic (f32, f32) -> (f32)
269272
; CHECK-NEXT: # %bb.0:
270-
; CHECK-NEXT: local.get $push2=, 0
271-
; CHECK-NEXT: local.get $push1=, 1
272-
; CHECK-NEXT: f32.max $push0=, $pop2, $pop1
273-
; CHECK-NEXT: return $pop0
273+
; CHECK-NEXT: local.get $push5=, 0
274+
; CHECK-NEXT: local.get $push4=, 1
275+
; CHECK-NEXT: local.get $push3=, 0
276+
; CHECK-NEXT: local.get $push2=, 1
277+
; CHECK-NEXT: f32.gt $push0=, $pop3, $pop2
278+
; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0
279+
; CHECK-NEXT: return $pop1
274280
%a = call nnan float @llvm.maxnum.f32(float %x, float %y)
275281
ret float %a
276282
}
@@ -291,10 +297,13 @@ define float @fmaxnum32_zero_intrinsic(float %x) {
291297
; CHECK-LABEL: fmaxnum32_zero_intrinsic:
292298
; CHECK: .functype fmaxnum32_zero_intrinsic (f32) -> (f32)
293299
; CHECK-NEXT: # %bb.0:
294-
; CHECK-NEXT: local.get $push2=, 0
300+
; CHECK-NEXT: local.get $push5=, 0
295301
; CHECK-NEXT: f32.const $push0=, 0x0p0
296-
; CHECK-NEXT: f32.max $push1=, $pop2, $pop0
297-
; CHECK-NEXT: return $pop1
302+
; CHECK-NEXT: local.get $push4=, 0
303+
; CHECK-NEXT: f32.const $push3=, 0x0p0
304+
; CHECK-NEXT: f32.gt $push1=, $pop4, $pop3
305+
; CHECK-NEXT: f32.select $push2=, $pop5, $pop0, $pop1
306+
; CHECK-NEXT: return $pop2
298307
%a = call nnan float @llvm.maxnum.f32(float %x, float 0.0)
299308
ret float %a
300309
}

0 commit comments

Comments
 (0)