Skip to content

Commit f178e51

Browse files
authored
[SDAG] Add missing ppc_fp128 ExpandFloatRes legalization for modf (#127895)
Should fix: https://lab.llvm.org/buildbot/#/builders/72/builds/8380 (`test_modf_ppcf128` is the test case that needed the additional legalization)
1 parent a16fa3a commit f178e51

File tree

3 files changed

+352
-0
lines changed

3 files changed

+352
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
15691569
case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
15701570
case ISD::STRICT_FREM:
15711571
case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break;
1572+
case ISD::FMODF: ExpandFloatRes_FMODF(N); break;
15721573
// clang-format on
15731574
}
15741575

@@ -1619,6 +1620,23 @@ void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
16191620
GetPairElements(Tmp.first, Lo, Hi);
16201621
}
16211622

1623+
void DAGTypeLegalizer::ExpandFloatRes_FMODF(SDNode *N) {
1624+
ExpandFloatRes_UnaryWithTwoFPResults(N, RTLIB::getMODF(N->getValueType(0)),
1625+
/*CallRetResNo=*/0);
1626+
}
1627+
1628+
void DAGTypeLegalizer::ExpandFloatRes_UnaryWithTwoFPResults(
1629+
SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo) {
1630+
assert(!N->isStrictFPOpcode() && "strictfp not implemented");
1631+
SmallVector<SDValue> Results;
1632+
DAG.expandMultipleResultFPLibCall(LC, N, Results, CallRetResNo);
1633+
for (auto [ResNo, Res] : enumerate(Results)) {
1634+
SDValue Lo, Hi;
1635+
GetPairElements(Res, Lo, Hi);
1636+
SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
1637+
}
1638+
}
1639+
16221640
void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
16231641
SDValue &Hi) {
16241642
assert(N->getValueType(0) == MVT::ppcf128 &&

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
668668
SDValue &Lo, SDValue &Hi);
669669
void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
670670
SDValue &Lo, SDValue &Hi);
671+
void ExpandFloatRes_UnaryWithTwoFPResults(
672+
SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});
673+
671674
// clang-format off
672675
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
673676
void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -714,6 +717,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
714717
void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi);
715718
void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
716719
void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
720+
void ExpandFloatRes_FMODF(SDNode *N);
717721
// clang-format on
718722

719723
// Float Operand Expansion.
Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \
3+
; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s
4+
5+
define { half, half } @test_modf_f16(half %a) {
6+
; CHECK-LABEL: test_modf_f16:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: mflr r0
9+
; CHECK-NEXT: stdu r1, -48(r1)
10+
; CHECK-NEXT: std r0, 64(r1)
11+
; CHECK-NEXT: .cfi_def_cfa_offset 48
12+
; CHECK-NEXT: .cfi_offset lr, 16
13+
; CHECK-NEXT: xscvdphp f0, f1
14+
; CHECK-NEXT: addi r4, r1, 44
15+
; CHECK-NEXT: mffprwz r3, f0
16+
; CHECK-NEXT: clrlwi r3, r3, 16
17+
; CHECK-NEXT: mtfprwz f0, r3
18+
; CHECK-NEXT: xscvhpdp f1, f0
19+
; CHECK-NEXT: bl modff
20+
; CHECK-NEXT: nop
21+
; CHECK-NEXT: lfs f2, 44(r1)
22+
; CHECK-NEXT: addi r1, r1, 48
23+
; CHECK-NEXT: ld r0, 16(r1)
24+
; CHECK-NEXT: mtlr r0
25+
; CHECK-NEXT: blr
26+
%result = call { half, half } @llvm.modf.f16(half %a)
27+
ret { half, half } %result
28+
}
29+
30+
define half @test_modf_f16_only_use_fractional_part(half %a) {
31+
; CHECK-LABEL: test_modf_f16_only_use_fractional_part:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: mflr r0
34+
; CHECK-NEXT: stdu r1, -48(r1)
35+
; CHECK-NEXT: std r0, 64(r1)
36+
; CHECK-NEXT: .cfi_def_cfa_offset 48
37+
; CHECK-NEXT: .cfi_offset lr, 16
38+
; CHECK-NEXT: xscvdphp f0, f1
39+
; CHECK-NEXT: addi r4, r1, 44
40+
; CHECK-NEXT: mffprwz r3, f0
41+
; CHECK-NEXT: clrlwi r3, r3, 16
42+
; CHECK-NEXT: mtfprwz f0, r3
43+
; CHECK-NEXT: xscvhpdp f1, f0
44+
; CHECK-NEXT: bl modff
45+
; CHECK-NEXT: nop
46+
; CHECK-NEXT: addi r1, r1, 48
47+
; CHECK-NEXT: ld r0, 16(r1)
48+
; CHECK-NEXT: mtlr r0
49+
; CHECK-NEXT: blr
50+
%result = call { half, half } @llvm.modf.f16(half %a)
51+
%result.0 = extractvalue { half, half } %result, 0
52+
ret half %result.0
53+
}
54+
55+
define half @test_modf_f16_only_use_integral_part(half %a) {
56+
; CHECK-LABEL: test_modf_f16_only_use_integral_part:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: mflr r0
59+
; CHECK-NEXT: stdu r1, -48(r1)
60+
; CHECK-NEXT: std r0, 64(r1)
61+
; CHECK-NEXT: .cfi_def_cfa_offset 48
62+
; CHECK-NEXT: .cfi_offset lr, 16
63+
; CHECK-NEXT: xscvdphp f0, f1
64+
; CHECK-NEXT: addi r4, r1, 44
65+
; CHECK-NEXT: mffprwz r3, f0
66+
; CHECK-NEXT: clrlwi r3, r3, 16
67+
; CHECK-NEXT: mtfprwz f0, r3
68+
; CHECK-NEXT: xscvhpdp f1, f0
69+
; CHECK-NEXT: bl modff
70+
; CHECK-NEXT: nop
71+
; CHECK-NEXT: lfs f1, 44(r1)
72+
; CHECK-NEXT: addi r1, r1, 48
73+
; CHECK-NEXT: ld r0, 16(r1)
74+
; CHECK-NEXT: mtlr r0
75+
; CHECK-NEXT: blr
76+
%result = call { half, half } @llvm.modf.f16(half %a)
77+
%result.1 = extractvalue { half, half } %result, 1
78+
ret half %result.1
79+
}
80+
81+
define { <2 x half>, <2 x half> } @test_modf_v2f16(<2 x half> %a) {
82+
; CHECK-LABEL: test_modf_v2f16:
83+
; CHECK: # %bb.0:
84+
; CHECK-NEXT: mflr r0
85+
; CHECK-NEXT: .cfi_def_cfa_offset 64
86+
; CHECK-NEXT: .cfi_offset lr, 16
87+
; CHECK-NEXT: .cfi_offset f30, -16
88+
; CHECK-NEXT: .cfi_offset f31, -8
89+
; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill
90+
; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
91+
; CHECK-NEXT: stdu r1, -64(r1)
92+
; CHECK-NEXT: std r0, 80(r1)
93+
; CHECK-NEXT: xscvdphp f0, f2
94+
; CHECK-NEXT: addi r4, r1, 40
95+
; CHECK-NEXT: mffprwz r3, f0
96+
; CHECK-NEXT: clrlwi r3, r3, 16
97+
; CHECK-NEXT: mtfprwz f0, r3
98+
; CHECK-NEXT: xscvhpdp f31, f0
99+
; CHECK-NEXT: xscvdphp f0, f1
100+
; CHECK-NEXT: mffprwz r3, f0
101+
; CHECK-NEXT: clrlwi r3, r3, 16
102+
; CHECK-NEXT: mtfprwz f0, r3
103+
; CHECK-NEXT: xscvhpdp f1, f0
104+
; CHECK-NEXT: bl modff
105+
; CHECK-NEXT: nop
106+
; CHECK-NEXT: addi r4, r1, 44
107+
; CHECK-NEXT: fmr f30, f1
108+
; CHECK-NEXT: fmr f1, f31
109+
; CHECK-NEXT: bl modff
110+
; CHECK-NEXT: nop
111+
; CHECK-NEXT: lfs f3, 40(r1)
112+
; CHECK-NEXT: fmr f2, f1
113+
; CHECK-NEXT: fmr f1, f30
114+
; CHECK-NEXT: lfs f4, 44(r1)
115+
; CHECK-NEXT: addi r1, r1, 64
116+
; CHECK-NEXT: ld r0, 16(r1)
117+
; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload
118+
; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload
119+
; CHECK-NEXT: mtlr r0
120+
; CHECK-NEXT: blr
121+
%result = call { <2 x half>, <2 x half> } @llvm.modf.v2f16(<2 x half> %a)
122+
ret { <2 x half>, <2 x half> } %result
123+
}
124+
125+
define { float, float } @test_modf_f32(float %a) {
126+
; CHECK-LABEL: test_modf_f32:
127+
; CHECK: # %bb.0:
128+
; CHECK-NEXT: mflr r0
129+
; CHECK-NEXT: stdu r1, -48(r1)
130+
; CHECK-NEXT: std r0, 64(r1)
131+
; CHECK-NEXT: .cfi_def_cfa_offset 48
132+
; CHECK-NEXT: .cfi_offset lr, 16
133+
; CHECK-NEXT: addi r4, r1, 44
134+
; CHECK-NEXT: bl modff
135+
; CHECK-NEXT: nop
136+
; CHECK-NEXT: lfs f2, 44(r1)
137+
; CHECK-NEXT: addi r1, r1, 48
138+
; CHECK-NEXT: ld r0, 16(r1)
139+
; CHECK-NEXT: mtlr r0
140+
; CHECK-NEXT: blr
141+
%result = call { float, float } @llvm.modf.f32(float %a)
142+
ret { float, float } %result
143+
}
144+
145+
define { <3 x float>, <3 x float> } @test_modf_v3f32(<3 x float> %a) {
146+
; CHECK-LABEL: test_modf_v3f32:
147+
; CHECK: # %bb.0:
148+
; CHECK-NEXT: mflr r0
149+
; CHECK-NEXT: stdu r1, -112(r1)
150+
; CHECK-NEXT: std r0, 128(r1)
151+
; CHECK-NEXT: .cfi_def_cfa_offset 112
152+
; CHECK-NEXT: .cfi_offset lr, 16
153+
; CHECK-NEXT: .cfi_offset r28, -32
154+
; CHECK-NEXT: .cfi_offset r29, -24
155+
; CHECK-NEXT: .cfi_offset r30, -16
156+
; CHECK-NEXT: .cfi_offset v30, -64
157+
; CHECK-NEXT: .cfi_offset v31, -48
158+
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
159+
; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
160+
; CHECK-NEXT: addi r30, r1, 36
161+
; CHECK-NEXT: std r28, 80(r1) # 8-byte Folded Spill
162+
; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
163+
; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
164+
; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
165+
; CHECK-NEXT: mr r4, r30
166+
; CHECK-NEXT: vmr v31, v2
167+
; CHECK-NEXT: xscvspdpn f1, vs0
168+
; CHECK-NEXT: bl modff
169+
; CHECK-NEXT: nop
170+
; CHECK-NEXT: xxswapd vs0, v31
171+
; CHECK-NEXT: addi r29, r1, 40
172+
; CHECK-NEXT: xscvdpspn v30, f1
173+
; CHECK-NEXT: mr r4, r29
174+
; CHECK-NEXT: xscvspdpn f1, vs0
175+
; CHECK-NEXT: bl modff
176+
; CHECK-NEXT: nop
177+
; CHECK-NEXT: xscvdpspn vs0, f1
178+
; CHECK-NEXT: addi r28, r1, 44
179+
; CHECK-NEXT: mr r4, r28
180+
; CHECK-NEXT: xxmrghw v30, vs0, v30
181+
; CHECK-NEXT: xxsldwi vs0, v31, v31, 1
182+
; CHECK-NEXT: xscvspdpn f1, vs0
183+
; CHECK-NEXT: bl modff
184+
; CHECK-NEXT: nop
185+
; CHECK-NEXT: addis r3, r2, .LCPI5_0@toc@ha
186+
; CHECK-NEXT: xscvdpspn v2, f1
187+
; CHECK-NEXT: lfiwzx f1, 0, r30
188+
; CHECK-NEXT: lfiwzx f2, 0, r29
189+
; CHECK-NEXT: lxsiwzx v3, 0, r28
190+
; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
191+
; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
192+
; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
193+
; CHECK-NEXT: ld r28, 80(r1) # 8-byte Folded Reload
194+
; CHECK-NEXT: addi r3, r3, .LCPI5_0@toc@l
195+
; CHECK-NEXT: lxv vs0, 0(r3)
196+
; CHECK-NEXT: xxmrghw v4, vs2, vs1
197+
; CHECK-NEXT: xxperm v2, v30, vs0
198+
; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
199+
; CHECK-NEXT: xxperm v3, v4, vs0
200+
; CHECK-NEXT: addi r1, r1, 112
201+
; CHECK-NEXT: ld r0, 16(r1)
202+
; CHECK-NEXT: mtlr r0
203+
; CHECK-NEXT: blr
204+
%result = call { <3 x float>, <3 x float> } @llvm.modf.v3f32(<3 x float> %a)
205+
ret { <3 x float>, <3 x float> } %result
206+
}
207+
208+
define { <2 x float>, <2 x float> } @test_modf_v2f32(<2 x float> %a) {
209+
; CHECK-LABEL: test_modf_v2f32:
210+
; CHECK: # %bb.0:
211+
; CHECK-NEXT: mflr r0
212+
; CHECK-NEXT: stdu r1, -112(r1)
213+
; CHECK-NEXT: std r0, 128(r1)
214+
; CHECK-NEXT: .cfi_def_cfa_offset 112
215+
; CHECK-NEXT: .cfi_offset lr, 16
216+
; CHECK-NEXT: .cfi_offset r29, -24
217+
; CHECK-NEXT: .cfi_offset r30, -16
218+
; CHECK-NEXT: .cfi_offset v30, -64
219+
; CHECK-NEXT: .cfi_offset v31, -48
220+
; CHECK-NEXT: xxsldwi vs0, v2, v2, 3
221+
; CHECK-NEXT: std r30, 96(r1) # 8-byte Folded Spill
222+
; CHECK-NEXT: addi r30, r1, 40
223+
; CHECK-NEXT: std r29, 88(r1) # 8-byte Folded Spill
224+
; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
225+
; CHECK-NEXT: mr r4, r30
226+
; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
227+
; CHECK-NEXT: xscvspdpn f1, vs0
228+
; CHECK-NEXT: vmr v31, v2
229+
; CHECK-NEXT: bl modff
230+
; CHECK-NEXT: nop
231+
; CHECK-NEXT: xxswapd vs0, v31
232+
; CHECK-NEXT: addi r29, r1, 44
233+
; CHECK-NEXT: xscvdpspn v30, f1
234+
; CHECK-NEXT: mr r4, r29
235+
; CHECK-NEXT: xscvspdpn f1, vs0
236+
; CHECK-NEXT: bl modff
237+
; CHECK-NEXT: nop
238+
; CHECK-NEXT: xscvdpspn vs0, f1
239+
; CHECK-NEXT: lfiwzx f1, 0, r29
240+
; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
241+
; CHECK-NEXT: ld r29, 88(r1) # 8-byte Folded Reload
242+
; CHECK-NEXT: xxmrghw v2, vs0, v30
243+
; CHECK-NEXT: lfiwzx f0, 0, r30
244+
; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
245+
; CHECK-NEXT: ld r30, 96(r1) # 8-byte Folded Reload
246+
; CHECK-NEXT: xxmrghw v3, vs1, vs0
247+
; CHECK-NEXT: addi r1, r1, 112
248+
; CHECK-NEXT: ld r0, 16(r1)
249+
; CHECK-NEXT: mtlr r0
250+
; CHECK-NEXT: blr
251+
%result = call { <2 x float>, <2 x float> } @llvm.modf.v2f32(<2 x float> %a)
252+
ret { <2 x float>, <2 x float> } %result
253+
}
254+
255+
define { double, double } @test_modf_f64(double %a) {
256+
; CHECK-LABEL: test_modf_f64:
257+
; CHECK: # %bb.0:
258+
; CHECK-NEXT: mflr r0
259+
; CHECK-NEXT: stdu r1, -48(r1)
260+
; CHECK-NEXT: std r0, 64(r1)
261+
; CHECK-NEXT: .cfi_def_cfa_offset 48
262+
; CHECK-NEXT: .cfi_offset lr, 16
263+
; CHECK-NEXT: addi r4, r1, 40
264+
; CHECK-NEXT: bl modf
265+
; CHECK-NEXT: nop
266+
; CHECK-NEXT: lfd f2, 40(r1)
267+
; CHECK-NEXT: addi r1, r1, 48
268+
; CHECK-NEXT: ld r0, 16(r1)
269+
; CHECK-NEXT: mtlr r0
270+
; CHECK-NEXT: blr
271+
%result = call { double, double } @llvm.modf.f64(double %a)
272+
ret { double, double } %result
273+
}
274+
275+
define { <2 x double>, <2 x double> } @test_modf_v2f64(<2 x double> %a) {
276+
; CHECK-LABEL: test_modf_v2f64:
277+
; CHECK: # %bb.0:
278+
; CHECK-NEXT: mflr r0
279+
; CHECK-NEXT: stdu r1, -80(r1)
280+
; CHECK-NEXT: std r0, 96(r1)
281+
; CHECK-NEXT: .cfi_def_cfa_offset 80
282+
; CHECK-NEXT: .cfi_offset lr, 16
283+
; CHECK-NEXT: .cfi_offset v30, -32
284+
; CHECK-NEXT: .cfi_offset v31, -16
285+
; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill
286+
; CHECK-NEXT: vmr v31, v2
287+
; CHECK-NEXT: addi r4, r1, 32
288+
; CHECK-NEXT: xscpsgndp f1, v31, v31
289+
; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill
290+
; CHECK-NEXT: bl modf
291+
; CHECK-NEXT: nop
292+
; CHECK-NEXT: xscpsgndp v30, f1, f1
293+
; CHECK-NEXT: xxswapd vs1, v31
294+
; CHECK-NEXT: addi r4, r1, 40
295+
; CHECK-NEXT: bl modf
296+
; CHECK-NEXT: nop
297+
; CHECK-NEXT: xxmrghd v2, v30, vs1
298+
; CHECK-NEXT: lfd f0, 32(r1)
299+
; CHECK-NEXT: lfd f1, 40(r1)
300+
; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload
301+
; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload
302+
; CHECK-NEXT: xxmrghd v3, vs0, vs1
303+
; CHECK-NEXT: addi r1, r1, 80
304+
; CHECK-NEXT: ld r0, 16(r1)
305+
; CHECK-NEXT: mtlr r0
306+
; CHECK-NEXT: blr
307+
%result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %a)
308+
ret { <2 x double>, <2 x double> } %result
309+
}
310+
311+
define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) {
312+
; CHECK-LABEL: test_modf_ppcf128:
313+
; CHECK: # %bb.0:
314+
; CHECK-NEXT: mflr r0
315+
; CHECK-NEXT: stdu r1, -48(r1)
316+
; CHECK-NEXT: std r0, 64(r1)
317+
; CHECK-NEXT: .cfi_def_cfa_offset 48
318+
; CHECK-NEXT: .cfi_offset lr, 16
319+
; CHECK-NEXT: addi r5, r1, 32
320+
; CHECK-NEXT: bl modfl
321+
; CHECK-NEXT: nop
322+
; CHECK-NEXT: lfd f3, 32(r1)
323+
; CHECK-NEXT: lfd f4, 40(r1)
324+
; CHECK-NEXT: addi r1, r1, 48
325+
; CHECK-NEXT: ld r0, 16(r1)
326+
; CHECK-NEXT: mtlr r0
327+
; CHECK-NEXT: blr
328+
%result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a)
329+
ret { ppc_fp128, ppc_fp128 } %result
330+
}

0 commit comments

Comments
 (0)