Skip to content

Commit d642eec

Browse files
[HEXAGON] Fix semantics of ordered FP compares (llvm#131089)
For the ordered FP compare bitcode instructions, the Hexagon backend was assuming that no operand could be a NaN. This assumption is flawed. This patch fixes the code-generation to produce fpcmp.uo and and appropriate bit comparison operators to account for the case when an operand to a FP compare is a NaN. Fix for llvm#129391 Co-authored-by: aankit-quic <[email protected]>
1 parent 64f67f8 commit d642eec

File tree

2 files changed

+213
-14
lines changed

2 files changed

+213
-14
lines changed

llvm/lib/Target/Hexagon/HexagonPatterns.td

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -721,23 +721,13 @@ def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
721721
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
722722
def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
723723
def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
724-
def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
725-
def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
726-
def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
727-
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
728-
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
729724
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
730725
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
731726
def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
732727

733728
def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
734729
def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
735730
def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
736-
def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
737-
def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
738-
def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
739-
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
740-
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
741731
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
742732
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
743733
def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
@@ -900,15 +890,35 @@ def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
900890
def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
901891
def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
902892

903-
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
904-
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
893+
class T4<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3, InstHexagon MI4>
894+
: OutPatFrag<(ops node:$Rs, node:$Rt),
895+
(MI1 (MI2 (MI3 $Rs, $Rt), (MI4 $Rs, $Rt)))>;
905896

906-
def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
907-
def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
897+
class Cmpof<InstHexagon MI>: T3<C2_andn, MI, F2_sfcmpuo>;
898+
class Cmpod<InstHexagon MI>: T3<C2_andn, MI, F2_dfcmpuo>;
899+
900+
class Cmpofn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_sfcmpuo>;
901+
class Cmpodn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_dfcmpuo>;
902+
903+
def: OpmR_RR_pat<Cmpof<F2_sfcmpeq>, setoeq, i1, F32>;
904+
def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, setoge, i1, F32>;
905+
def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, setogt, i1, F32>;
906+
def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, RevCmp<setole>, i1, F32>;
907+
def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, RevCmp<setolt>, i1, F32>;
908+
def: OpmR_RR_pat<Cmpofn<F2_sfcmpeq>, setone, i1, F32>;
909+
910+
def: OpmR_RR_pat<Cmpod<F2_dfcmpeq>, setoeq, i1, F64>;
911+
def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, setoge, i1, F64>;
912+
def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, setogt, i1, F64>;
913+
def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, RevCmp<setole>, i1, F64>;
914+
def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, RevCmp<setolt>, i1, F64>;
915+
def: OpmR_RR_pat<Cmpodn<F2_dfcmpeq>, setone, i1, F64>;
908916

909917
def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
910918
def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
911919

920+
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
921+
def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
912922

913923
// --(6) Select ----------------------------------------------------------
914924
//

llvm/test/CodeGen/Hexagon/fcmp-nan.ll

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; RUN: llc -march=hexagon < %s | FileCheck %s
2+
;
3+
; Test that all FP ordered compare instructions generate the correct
4+
; post-processing to accommodate NaNs.
5+
;
6+
; Specifically for ordered FP compares, we have to check if one of
7+
; the operands was a NaN to comform to the semantics of the ordered
8+
; fcmp bitcode instruction
9+
;
10+
target triple = "hexagon"
11+
12+
;
13+
; Functions for float:
14+
;
15+
16+
;
17+
; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
18+
; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
19+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
20+
; CHECK: r0 = mux([[REG2]],#1,#0)
21+
;
22+
define i32 @compare_oeq_f(float %val, float %val2) local_unnamed_addr #0 {
23+
entry:
24+
%cmpinf = fcmp oeq float %val, %val2
25+
%0 = zext i1 %cmpinf to i32
26+
ret i32 %0
27+
}
28+
29+
30+
;
31+
; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
32+
; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
33+
; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
34+
; CHECK: r0 = mux([[REG2]],#0,#1)
35+
;
36+
define i32 @compare_one_f(float %val, float %val2) local_unnamed_addr #0 {
37+
entry:
38+
%cmpinf = fcmp one float %val, %val2
39+
%0 = zext i1 %cmpinf to i32
40+
ret i32 %0
41+
}
42+
43+
44+
;
45+
; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r0,r1)
46+
; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
47+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
48+
; CHECK: r0 = mux([[REG2]],#1,#0)
49+
;
50+
define i32 @compare_ogt_f(float %val, float %val2) local_unnamed_addr #0 {
51+
entry:
52+
%cmpinf = fcmp ogt float %val, %val2
53+
%0 = zext i1 %cmpinf to i32
54+
ret i32 %0
55+
}
56+
57+
58+
;
59+
; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r1,r0)
60+
; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
61+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
62+
; CHECK: r0 = mux([[REG2]],#1,#0)
63+
;
64+
define i32 @compare_ole_f(float %val, float %val2) local_unnamed_addr #0 {
65+
entry:
66+
%cmpinf = fcmp ole float %val, %val2
67+
%0 = zext i1 %cmpinf to i32
68+
ret i32 %0
69+
}
70+
71+
72+
73+
;
74+
; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r0,r1)
75+
; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
76+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
77+
; CHECK: r0 = mux([[REG2]],#1,#0)
78+
;
79+
define i32 @compare_oge_f(float %val, float %val2) local_unnamed_addr #0 {
80+
entry:
81+
%cmpinf = fcmp oge float %val, %val2
82+
%0 = zext i1 %cmpinf to i32
83+
ret i32 %0
84+
}
85+
86+
87+
;
88+
; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r1,r0)
89+
; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
90+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
91+
; CHECK: r0 = mux([[REG2]],#1,#0)
92+
;
93+
define i32 @compare_olt_f(float %val, float %val2) local_unnamed_addr #0 {
94+
entry:
95+
%cmpinf = fcmp olt float %val, %val2
96+
%0 = zext i1 %cmpinf to i32
97+
ret i32 %0
98+
}
99+
100+
101+
102+
;
103+
; Functions for double:
104+
;
105+
106+
;
107+
; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
108+
; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
109+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
110+
; CHECK: r0 = mux([[REG2]],#1,#0)
111+
;
112+
define i32 @compare_oeq_d(double %val, double %val2) local_unnamed_addr #0 {
113+
entry:
114+
%cmpinf = fcmp oeq double %val, %val2
115+
%0 = zext i1 %cmpinf to i32
116+
ret i32 %0
117+
}
118+
119+
120+
;
121+
; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
122+
; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
123+
; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
124+
; CHECK: r0 = mux([[REG2]],#0,#1)
125+
;
126+
define i32 @compare_one_d(double %val, double %val2) local_unnamed_addr #0 {
127+
entry:
128+
%cmpinf = fcmp one double %val, %val2
129+
%0 = zext i1 %cmpinf to i32
130+
ret i32 %0
131+
}
132+
133+
134+
135+
;
136+
; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r1:0,r3:2)
137+
; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
138+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
139+
; CHECK: r0 = mux([[REG2]],#1,#0)
140+
;
141+
define i32 @compare_ogt_d(double %val, double %val2) local_unnamed_addr #0 {
142+
entry:
143+
%cmpinf = fcmp ogt double %val, %val2
144+
%0 = zext i1 %cmpinf to i32
145+
ret i32 %0
146+
}
147+
148+
149+
;
150+
; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r3:2,r1:0)
151+
; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
152+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
153+
; CHECK: r0 = mux([[REG2]],#1,#0)
154+
;
155+
define i32 @compare_ole_d(double %val, double %val2) local_unnamed_addr #0 {
156+
entry:
157+
%cmpinf = fcmp ole double %val, %val2
158+
%0 = zext i1 %cmpinf to i32
159+
ret i32 %0
160+
}
161+
162+
163+
;
164+
; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r1:0,r3:2)
165+
; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
166+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
167+
; CHECK: r0 = mux([[REG2]],#1,#0)
168+
;
169+
define i32 @compare_oge_d(double %val, double %val2) local_unnamed_addr #0 {
170+
entry:
171+
%cmpinf = fcmp oge double %val, %val2
172+
%0 = zext i1 %cmpinf to i32
173+
ret i32 %0
174+
}
175+
176+
177+
;
178+
; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r3:2,r1:0)
179+
; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
180+
; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
181+
; CHECK: r0 = mux([[REG2]],#1,#0)
182+
;
183+
define i32 @compare_olt_d(double %val, double %val2) local_unnamed_addr #0 {
184+
entry:
185+
%cmpinf = fcmp olt double %val, %val2
186+
%0 = zext i1 %cmpinf to i32
187+
ret i32 %0
188+
}
189+

0 commit comments

Comments
 (0)