Skip to content

Commit dd7a3d4

Browse files
committed
[X86] Extend llvm#118680 - support f16/bf16 fabs/fneg load-store patterns
1 parent ed9915f commit dd7a3d4

File tree

4 files changed

+40
-194
lines changed

4 files changed

+40
-194
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52662,7 +52662,7 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
5266252662
}
5266352663

5266452664
// Convert scalar fabs/fneg load-store to integer equivalents.
52665-
if ((VT == MVT::f32 || VT == MVT::f64) &&
52665+
if ((VT == MVT::f16 || VT == MVT::bf16 || VT == MVT::f32 || VT == MVT::f64) &&
5266652666
(StoredVal.getOpcode() == ISD::FABS ||
5266752667
StoredVal.getOpcode() == ISD::FNEG) &&
5266852668
ISD::isNormalLoad(StoredVal.getOperand(0).getNode()) &&

llvm/test/CodeGen/X86/combine-fabs.ll

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -175,29 +175,12 @@ define void @combine_fabs_int_f32(ptr %src, ptr %dst) {
175175
define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
176176
; SSE-LABEL: combine_fabs_int_rmw_bfloat:
177177
; SSE: # %bb.0:
178-
; SSE-NEXT: pushq %rbx
179-
; SSE-NEXT: movq %rdi, %rbx
180-
; SSE-NEXT: movzwl (%rdi), %eax
181-
; SSE-NEXT: shll $16, %eax
182-
; SSE-NEXT: movd %eax, %xmm0
183-
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
184-
; SSE-NEXT: callq __truncsfbf2@PLT
185-
; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
186-
; SSE-NEXT: popq %rbx
178+
; SSE-NEXT: andb $127, 1(%rdi)
187179
; SSE-NEXT: retq
188180
;
189181
; AVX-LABEL: combine_fabs_int_rmw_bfloat:
190182
; AVX: # %bb.0:
191-
; AVX-NEXT: pushq %rbx
192-
; AVX-NEXT: movq %rdi, %rbx
193-
; AVX-NEXT: movzwl (%rdi), %eax
194-
; AVX-NEXT: shll $16, %eax
195-
; AVX-NEXT: vmovd %eax, %xmm0
196-
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
197-
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
198-
; AVX-NEXT: callq __truncsfbf2@PLT
199-
; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
200-
; AVX-NEXT: popq %rbx
183+
; AVX-NEXT: andb $127, 1(%rdi)
201184
; AVX-NEXT: retq
202185
%1 = load bfloat, ptr %ptr
203186
%2 = call bfloat @llvm.fabs.bf16(bfloat %1)
@@ -208,27 +191,16 @@ define void @combine_fabs_int_rmw_bfloat(ptr %ptr) nounwind {
208191
define void @combine_fabs_int_half(ptr %src, ptr %dst) nounwind {
209192
; SSE-LABEL: combine_fabs_int_half:
210193
; SSE: # %bb.0:
211-
; SSE-NEXT: pushq %rbx
212-
; SSE-NEXT: movq %rsi, %rbx
213-
; SSE-NEXT: pinsrw $0, (%rdi), %xmm0
214-
; SSE-NEXT: callq __extendhfsf2@PLT
215-
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
216-
; SSE-NEXT: callq __truncsfhf2@PLT
217-
; SSE-NEXT: pextrw $0, %xmm0, (%rbx)
218-
; SSE-NEXT: popq %rbx
194+
; SSE-NEXT: movzwl (%rdi), %eax
195+
; SSE-NEXT: andl $32767, %eax # imm = 0x7FFF
196+
; SSE-NEXT: movw %ax, (%rsi)
219197
; SSE-NEXT: retq
220198
;
221199
; AVX-LABEL: combine_fabs_int_half:
222200
; AVX: # %bb.0:
223-
; AVX-NEXT: pushq %rbx
224-
; AVX-NEXT: movq %rsi, %rbx
225-
; AVX-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0
226-
; AVX-NEXT: callq __extendhfsf2@PLT
227-
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
228-
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
229-
; AVX-NEXT: callq __truncsfhf2@PLT
230-
; AVX-NEXT: vpextrw $0, %xmm0, (%rbx)
231-
; AVX-NEXT: popq %rbx
201+
; AVX-NEXT: movzwl (%rdi), %eax
202+
; AVX-NEXT: andl $32767, %eax # imm = 0x7FFF
203+
; AVX-NEXT: movw %ax, (%rsi)
232204
; AVX-NEXT: retq
233205
%1 = load half, ptr %src
234206
%2 = call half @llvm.fabs.f16(half %1)

llvm/test/CodeGen/X86/combine-fneg.ll

Lines changed: 23 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -207,140 +207,38 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
207207

208208
; store(fneg(load())) - convert scalar to integer
209209
define void @fneg_int_rmw_half(ptr %ptr) nounwind {
210-
; X86-SSE1-LABEL: fneg_int_rmw_half:
211-
; X86-SSE1: # %bb.0:
212-
; X86-SSE1-NEXT: pushl %esi
213-
; X86-SSE1-NEXT: subl $8, %esp
214-
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
215-
; X86-SSE1-NEXT: movzwl (%esi), %eax
216-
; X86-SSE1-NEXT: movl %eax, (%esp)
217-
; X86-SSE1-NEXT: calll __gnu_h2f_ieee
218-
; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
219-
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220-
; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
221-
; X86-SSE1-NEXT: movss %xmm0, (%esp)
222-
; X86-SSE1-NEXT: calll __gnu_f2h_ieee
223-
; X86-SSE1-NEXT: movw %ax, (%esi)
224-
; X86-SSE1-NEXT: addl $8, %esp
225-
; X86-SSE1-NEXT: popl %esi
226-
; X86-SSE1-NEXT: retl
227-
;
228-
; X86-SSE2-LABEL: fneg_int_rmw_half:
229-
; X86-SSE2: # %bb.0:
230-
; X86-SSE2-NEXT: pushl %esi
231-
; X86-SSE2-NEXT: subl $8, %esp
232-
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
233-
; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
234-
; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
235-
; X86-SSE2-NEXT: movw %ax, (%esp)
236-
; X86-SSE2-NEXT: calll __extendhfsf2
237-
; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
238-
; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
239-
; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
240-
; X86-SSE2-NEXT: movd %xmm0, (%esp)
241-
; X86-SSE2-NEXT: calll __truncsfhf2
242-
; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
243-
; X86-SSE2-NEXT: movw %ax, (%esi)
244-
; X86-SSE2-NEXT: addl $8, %esp
245-
; X86-SSE2-NEXT: popl %esi
246-
; X86-SSE2-NEXT: retl
247-
;
248-
; X64-SSE1-LABEL: fneg_int_rmw_half:
249-
; X64-SSE1: # %bb.0:
250-
; X64-SSE1-NEXT: pushq %rbx
251-
; X64-SSE1-NEXT: movq %rdi, %rbx
252-
; X64-SSE1-NEXT: movzwl (%rdi), %edi
253-
; X64-SSE1-NEXT: callq __gnu_h2f_ieee@PLT
254-
; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
255-
; X64-SSE1-NEXT: callq __gnu_f2h_ieee@PLT
256-
; X64-SSE1-NEXT: movw %ax, (%rbx)
257-
; X64-SSE1-NEXT: popq %rbx
258-
; X64-SSE1-NEXT: retq
210+
; X86-SSE-LABEL: fneg_int_rmw_half:
211+
; X86-SSE: # %bb.0:
212+
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
213+
; X86-SSE-NEXT: xorb $-128, 1(%eax)
214+
; X86-SSE-NEXT: retl
259215
;
260-
; X64-SSE2-LABEL: fneg_int_rmw_half:
261-
; X64-SSE2: # %bb.0:
262-
; X64-SSE2-NEXT: pushq %rbx
263-
; X64-SSE2-NEXT: movq %rdi, %rbx
264-
; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
265-
; X64-SSE2-NEXT: callq __extendhfsf2@PLT
266-
; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267-
; X64-SSE2-NEXT: callq __truncsfhf2@PLT
268-
; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
269-
; X64-SSE2-NEXT: movw %ax, (%rbx)
270-
; X64-SSE2-NEXT: popq %rbx
271-
; X64-SSE2-NEXT: retq
216+
; X64-SSE-LABEL: fneg_int_rmw_half:
217+
; X64-SSE: # %bb.0:
218+
; X64-SSE-NEXT: xorb $-128, 1(%rdi)
219+
; X64-SSE-NEXT: retq
272220
%1 = load half, ptr %ptr
273221
%2 = fneg half %1
274222
store half %2, ptr %ptr
275223
ret void
276224
}
277225

278226
define void @fneg_int_bfloat(ptr %src, ptr %dst) nounwind {
279-
; X86-SSE1-LABEL: fneg_int_bfloat:
280-
; X86-SSE1: # %bb.0:
281-
; X86-SSE1-NEXT: pushl %esi
282-
; X86-SSE1-NEXT: subl $8, %esp
283-
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
284-
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
285-
; X86-SSE1-NEXT: movzwl (%eax), %eax
286-
; X86-SSE1-NEXT: shll $16, %eax
287-
; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
288-
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
289-
; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
290-
; X86-SSE1-NEXT: movss %xmm0, (%esp)
291-
; X86-SSE1-NEXT: calll __truncsfbf2
292-
; X86-SSE1-NEXT: movw %ax, (%esi)
293-
; X86-SSE1-NEXT: addl $8, %esp
294-
; X86-SSE1-NEXT: popl %esi
295-
; X86-SSE1-NEXT: retl
296-
;
297-
; X86-SSE2-LABEL: fneg_int_bfloat:
298-
; X86-SSE2: # %bb.0:
299-
; X86-SSE2-NEXT: pushl %esi
300-
; X86-SSE2-NEXT: pushl %eax
301-
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
302-
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
303-
; X86-SSE2-NEXT: movzwl (%eax), %eax
304-
; X86-SSE2-NEXT: shll $16, %eax
305-
; X86-SSE2-NEXT: movd %eax, %xmm0
306-
; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
307-
; X86-SSE2-NEXT: movd %xmm0, (%esp)
308-
; X86-SSE2-NEXT: calll __truncsfbf2
309-
; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
310-
; X86-SSE2-NEXT: movw %ax, (%esi)
311-
; X86-SSE2-NEXT: addl $4, %esp
312-
; X86-SSE2-NEXT: popl %esi
313-
; X86-SSE2-NEXT: retl
314-
;
315-
; X64-SSE1-LABEL: fneg_int_bfloat:
316-
; X64-SSE1: # %bb.0:
317-
; X64-SSE1-NEXT: pushq %rbx
318-
; X64-SSE1-NEXT: subq $16, %rsp
319-
; X64-SSE1-NEXT: movq %rsi, %rbx
320-
; X64-SSE1-NEXT: movzwl (%rdi), %eax
321-
; X64-SSE1-NEXT: shll $16, %eax
322-
; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
323-
; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
324-
; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
325-
; X64-SSE1-NEXT: callq __truncsfbf2@PLT
326-
; X64-SSE1-NEXT: movw %ax, (%rbx)
327-
; X64-SSE1-NEXT: addq $16, %rsp
328-
; X64-SSE1-NEXT: popq %rbx
329-
; X64-SSE1-NEXT: retq
227+
; X86-SSE-LABEL: fneg_int_bfloat:
228+
; X86-SSE: # %bb.0:
229+
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
230+
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
231+
; X86-SSE-NEXT: movzwl (%ecx), %ecx
232+
; X86-SSE-NEXT: xorl $32768, %ecx # imm = 0x8000
233+
; X86-SSE-NEXT: movw %cx, (%eax)
234+
; X86-SSE-NEXT: retl
330235
;
331-
; X64-SSE2-LABEL: fneg_int_bfloat:
332-
; X64-SSE2: # %bb.0:
333-
; X64-SSE2-NEXT: pushq %rbx
334-
; X64-SSE2-NEXT: movq %rsi, %rbx
335-
; X64-SSE2-NEXT: movzwl (%rdi), %eax
336-
; X64-SSE2-NEXT: shll $16, %eax
337-
; X64-SSE2-NEXT: movd %eax, %xmm0
338-
; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
339-
; X64-SSE2-NEXT: callq __truncsfbf2@PLT
340-
; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
341-
; X64-SSE2-NEXT: movw %ax, (%rbx)
342-
; X64-SSE2-NEXT: popq %rbx
343-
; X64-SSE2-NEXT: retq
236+
; X64-SSE-LABEL: fneg_int_bfloat:
237+
; X64-SSE: # %bb.0:
238+
; X64-SSE-NEXT: movzwl (%rdi), %eax
239+
; X64-SSE-NEXT: xorl $32768, %eax # imm = 0x8000
240+
; X64-SSE-NEXT: movw %ax, (%rsi)
241+
; X64-SSE-NEXT: retq
344242
%1 = load bfloat, ptr %src
345243
%2 = fneg bfloat %1
346244
store bfloat %2, ptr %dst

llvm/test/CodeGen/X86/fp16-libcalls.ll

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -379,22 +379,10 @@ define void @test_half_fabs(half %a0, ptr %p0) nounwind {
379379
;
380380
; X86-LABEL: test_half_fabs:
381381
; X86: # %bb.0:
382-
; X86-NEXT: pushl %esi
383-
; X86-NEXT: subl $8, %esp
384-
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
385-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
386-
; X86-NEXT: pextrw $0, %xmm0, %eax
387-
; X86-NEXT: movw %ax, (%esp)
388-
; X86-NEXT: calll __extendhfsf2
389-
; X86-NEXT: fstps {{[0-9]+}}(%esp)
390-
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
391-
; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
392-
; X86-NEXT: movd %xmm0, (%esp)
393-
; X86-NEXT: calll __truncsfhf2
394-
; X86-NEXT: pextrw $0, %xmm0, %eax
395-
; X86-NEXT: movw %ax, (%esi)
396-
; X86-NEXT: addl $8, %esp
397-
; X86-NEXT: popl %esi
382+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
383+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
384+
; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF
385+
; X86-NEXT: movw %cx, (%eax)
398386
; X86-NEXT: retl
399387
%res = call half @llvm.fabs.half(half %a0)
400388
store half %res, ptr %p0, align 2
@@ -584,22 +572,10 @@ define void @test_half_fneg(half %a0, ptr %p0) nounwind {
584572
;
585573
; X86-LABEL: test_half_fneg:
586574
; X86: # %bb.0:
587-
; X86-NEXT: pushl %esi
588-
; X86-NEXT: subl $8, %esp
589-
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
590-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
591-
; X86-NEXT: pextrw $0, %xmm0, %eax
592-
; X86-NEXT: movw %ax, (%esp)
593-
; X86-NEXT: calll __extendhfsf2
594-
; X86-NEXT: fstps {{[0-9]+}}(%esp)
595-
; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
596-
; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
597-
; X86-NEXT: movd %xmm0, (%esp)
598-
; X86-NEXT: calll __truncsfhf2
599-
; X86-NEXT: pextrw $0, %xmm0, %eax
600-
; X86-NEXT: movw %ax, (%esi)
601-
; X86-NEXT: addl $8, %esp
602-
; X86-NEXT: popl %esi
575+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
576+
; X86-NEXT: movl $32768, %ecx # imm = 0x8000
577+
; X86-NEXT: xorl {{[0-9]+}}(%esp), %ecx
578+
; X86-NEXT: movw %cx, (%eax)
603579
; X86-NEXT: retl
604580
%res = fneg half %a0
605581
store half %res, ptr %p0, align 2

0 commit comments

Comments
 (0)