Skip to content

Commit 4267219

Browse files
authored
[GISel][AArch64] Libcall support for G_FPEXT 128-bit types (#97735)
This patch adds support for generating libcall for 128-bit types of G_FPEXT. This fixes ~10 fallbacks in RajaPerf benchmark.
1 parent 7df39ac commit 4267219

File tree

2 files changed

+236
-0
lines changed

2 files changed

+236
-0
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
656656
getActionDefinitionsBuilder(G_FPEXT)
657657
.legalFor(
658658
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
659+
.libcallFor({{s128, s64}, {s128, s32}, {s128, s16}})
659660
.clampNumElements(0, v4s32, v4s32)
660661
.clampNumElements(0, v2s64, v2s64)
661662
.scalarize(0);

llvm/test/CodeGen/AArch64/fpext.ll

+235
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,33 @@ entry:
3232
ret float %c
3333
}
3434

35+
define fp128 @fpext_f16_f128(half %a) {
36+
; CHECK-LABEL: fpext_f16_f128:
37+
; CHECK: // %bb.0: // %entry
38+
; CHECK-NEXT: b __extendhftf2
39+
entry:
40+
%c = fpext half %a to fp128
41+
ret fp128 %c
42+
}
43+
44+
define fp128 @fpext_f32_f128(float %a) {
45+
; CHECK-LABEL: fpext_f32_f128:
46+
; CHECK: // %bb.0: // %entry
47+
; CHECK-NEXT: b __extendsftf2
48+
entry:
49+
%c = fpext float %a to fp128
50+
ret fp128 %c
51+
}
52+
53+
define fp128 @fpext_f64_f128(double %a) {
54+
; CHECK-LABEL: fpext_f64_f128:
55+
; CHECK: // %bb.0: // %entry
56+
; CHECK-NEXT: b __extenddftf2
57+
entry:
58+
%c = fpext double %a to fp128
59+
ret fp128 %c
60+
}
61+
3562
define <2 x double> @fpext_v2f32_v2f64(<2 x float> %a) {
3663
; CHECK-LABEL: fpext_v2f32_v2f64:
3764
; CHECK: // %bb.0: // %entry
@@ -66,6 +93,214 @@ entry:
6693
ret <3 x double> %c
6794
}
6895

96+
define <4 x fp128> @fpext_v4f16_v4f128(<4 x half> %a) {
97+
; CHECK-SD-LABEL: fpext_v4f16_v4f128:
98+
; CHECK-SD: // %bb.0: // %entry
99+
; CHECK-SD-NEXT: sub sp, sp, #64
100+
; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
101+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
102+
; CHECK-SD-NEXT: .cfi_offset w30, -16
103+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
104+
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
105+
; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
106+
; CHECK-SD-NEXT: bl __extendhftf2
107+
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
108+
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
109+
; CHECK-SD-NEXT: mov h1, v1.h[1]
110+
; CHECK-SD-NEXT: fmov s0, s1
111+
; CHECK-SD-NEXT: bl __extendhftf2
112+
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
113+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
114+
; CHECK-SD-NEXT: mov h1, v1.h[2]
115+
; CHECK-SD-NEXT: fmov s0, s1
116+
; CHECK-SD-NEXT: bl __extendhftf2
117+
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
118+
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
119+
; CHECK-SD-NEXT: mov h1, v1.h[3]
120+
; CHECK-SD-NEXT: fmov s0, s1
121+
; CHECK-SD-NEXT: bl __extendhftf2
122+
; CHECK-SD-NEXT: mov v3.16b, v0.16b
123+
; CHECK-SD-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
124+
; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
125+
; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
126+
; CHECK-SD-NEXT: add sp, sp, #64
127+
; CHECK-SD-NEXT: ret
128+
;
129+
; CHECK-GI-LABEL: fpext_v4f16_v4f128:
130+
; CHECK-GI: // %bb.0: // %entry
131+
; CHECK-GI-NEXT: sub sp, sp, #80
132+
; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
133+
; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
134+
; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
135+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
136+
; CHECK-GI-NEXT: .cfi_offset w30, -8
137+
; CHECK-GI-NEXT: .cfi_offset b8, -16
138+
; CHECK-GI-NEXT: .cfi_offset b9, -24
139+
; CHECK-GI-NEXT: .cfi_offset b10, -32
140+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
141+
; CHECK-GI-NEXT: mov h8, v0.h[1]
142+
; CHECK-GI-NEXT: mov h9, v0.h[2]
143+
; CHECK-GI-NEXT: mov h10, v0.h[3]
144+
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 killed $q0
145+
; CHECK-GI-NEXT: bl __extendhftf2
146+
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
147+
; CHECK-GI-NEXT: fmov s0, s8
148+
; CHECK-GI-NEXT: bl __extendhftf2
149+
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
150+
; CHECK-GI-NEXT: fmov s0, s9
151+
; CHECK-GI-NEXT: bl __extendhftf2
152+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
153+
; CHECK-GI-NEXT: fmov s0, s10
154+
; CHECK-GI-NEXT: bl __extendhftf2
155+
; CHECK-GI-NEXT: mov v3.16b, v0.16b
156+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
157+
; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
158+
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
159+
; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
160+
; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
161+
; CHECK-GI-NEXT: add sp, sp, #80
162+
; CHECK-GI-NEXT: ret
163+
entry:
164+
%c = fpext <4 x half> %a to <4 x fp128>
165+
ret <4 x fp128> %c
166+
}
167+
168+
define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) {
169+
; CHECK-SD-LABEL: fpext_v4f32_v4f128:
170+
; CHECK-SD: // %bb.0: // %entry
171+
; CHECK-SD-NEXT: sub sp, sp, #80
172+
; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
173+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
174+
; CHECK-SD-NEXT: .cfi_offset w30, -16
175+
; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill
176+
; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
177+
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
178+
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
179+
; CHECK-SD-NEXT: bl __extendsftf2
180+
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
181+
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
182+
; CHECK-SD-NEXT: mov s1, v1.s[1]
183+
; CHECK-SD-NEXT: fmov s0, s1
184+
; CHECK-SD-NEXT: bl __extendsftf2
185+
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
186+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
187+
; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
188+
; CHECK-SD-NEXT: bl __extendsftf2
189+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
190+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
191+
; CHECK-SD-NEXT: mov s0, v0.s[1]
192+
; CHECK-SD-NEXT: bl __extendsftf2
193+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
194+
; CHECK-SD-NEXT: ldp q0, q3, [sp] // 32-byte Folded Reload
195+
; CHECK-SD-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload
196+
; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
197+
; CHECK-SD-NEXT: add sp, sp, #80
198+
; CHECK-SD-NEXT: ret
199+
;
200+
; CHECK-GI-LABEL: fpext_v4f32_v4f128:
201+
; CHECK-GI: // %bb.0: // %entry
202+
; CHECK-GI-NEXT: sub sp, sp, #80
203+
; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill
204+
; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill
205+
; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill
206+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
207+
; CHECK-GI-NEXT: .cfi_offset w30, -8
208+
; CHECK-GI-NEXT: .cfi_offset b8, -16
209+
; CHECK-GI-NEXT: .cfi_offset b9, -24
210+
; CHECK-GI-NEXT: .cfi_offset b10, -32
211+
; CHECK-GI-NEXT: mov s8, v0.s[1]
212+
; CHECK-GI-NEXT: mov s9, v0.s[2]
213+
; CHECK-GI-NEXT: mov s10, v0.s[3]
214+
; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0
215+
; CHECK-GI-NEXT: bl __extendsftf2
216+
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
217+
; CHECK-GI-NEXT: fmov s0, s8
218+
; CHECK-GI-NEXT: bl __extendsftf2
219+
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
220+
; CHECK-GI-NEXT: fmov s0, s9
221+
; CHECK-GI-NEXT: bl __extendsftf2
222+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
223+
; CHECK-GI-NEXT: fmov s0, s10
224+
; CHECK-GI-NEXT: bl __extendsftf2
225+
; CHECK-GI-NEXT: mov v3.16b, v0.16b
226+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
227+
; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload
228+
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
229+
; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload
230+
; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload
231+
; CHECK-GI-NEXT: add sp, sp, #80
232+
; CHECK-GI-NEXT: ret
233+
entry:
234+
%c = fpext <4 x float> %a to <4 x fp128>
235+
ret <4 x fp128> %c
236+
}
237+
238+
define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) {
239+
; CHECK-SD-LABEL: fpext_v4f64_v4f128:
240+
; CHECK-SD: // %bb.0: // %entry
241+
; CHECK-SD-NEXT: sub sp, sp, #80
242+
; CHECK-SD-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
243+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
244+
; CHECK-SD-NEXT: .cfi_offset w30, -16
245+
; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill
246+
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
247+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
248+
; CHECK-SD-NEXT: bl __extenddftf2
249+
; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
250+
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
251+
; CHECK-SD-NEXT: mov d0, v0.d[1]
252+
; CHECK-SD-NEXT: bl __extenddftf2
253+
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
254+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
255+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
256+
; CHECK-SD-NEXT: bl __extenddftf2
257+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
258+
; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload
259+
; CHECK-SD-NEXT: mov d0, v0.d[1]
260+
; CHECK-SD-NEXT: bl __extenddftf2
261+
; CHECK-SD-NEXT: mov v3.16b, v0.16b
262+
; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
263+
; CHECK-SD-NEXT: ldr q2, [sp] // 16-byte Folded Reload
264+
; CHECK-SD-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
265+
; CHECK-SD-NEXT: add sp, sp, #80
266+
; CHECK-SD-NEXT: ret
267+
;
268+
; CHECK-GI-LABEL: fpext_v4f64_v4f128:
269+
; CHECK-GI: // %bb.0: // %entry
270+
; CHECK-GI-NEXT: sub sp, sp, #80
271+
; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
272+
; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
273+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
274+
; CHECK-GI-NEXT: .cfi_offset w30, -16
275+
; CHECK-GI-NEXT: .cfi_offset b8, -24
276+
; CHECK-GI-NEXT: .cfi_offset b9, -32
277+
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
278+
; CHECK-GI-NEXT: mov d8, v0.d[1]
279+
; CHECK-GI-NEXT: mov d9, v1.d[1]
280+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
281+
; CHECK-GI-NEXT: bl __extenddftf2
282+
; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
283+
; CHECK-GI-NEXT: fmov d0, d8
284+
; CHECK-GI-NEXT: bl __extenddftf2
285+
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
286+
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
287+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
288+
; CHECK-GI-NEXT: bl __extenddftf2
289+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
290+
; CHECK-GI-NEXT: fmov d0, d9
291+
; CHECK-GI-NEXT: bl __extenddftf2
292+
; CHECK-GI-NEXT: mov v3.16b, v0.16b
293+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
294+
; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
295+
; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload
296+
; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
297+
; CHECK-GI-NEXT: add sp, sp, #80
298+
; CHECK-GI-NEXT: ret
299+
entry:
300+
%c = fpext <4 x double> %a to <4 x fp128>
301+
ret <4 x fp128> %c
302+
}
303+
69304
define <4 x double> @fpext_v4f32_v4f64(<4 x float> %a) {
70305
; CHECK-SD-LABEL: fpext_v4f32_v4f64:
71306
; CHECK-SD: // %bb.0: // %entry

0 commit comments

Comments
 (0)