Skip to content

Commit 065e985

Browse files
committed
[SelectionDAG] Make ARITH_FENCE support half and bfloat type
1 parent a015f01 commit 065e985

File tree

3 files changed

+93
-0
lines changed

3 files changed

+93
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2825,6 +2825,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
28252825
report_fatal_error("Do not know how to soft promote this operator's "
28262826
"result!");
28272827

2828+
case ISD::ARITH_FENCE:
2829+
R = SoftPromoteHalfRes_ARITH_FENCE(N); break;
28282830
case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break;
28292831
case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break;
28302832
case ISD::EXTRACT_VECTOR_ELT:
@@ -2904,6 +2906,11 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
29042906
SetSoftPromotedHalf(SDValue(N, ResNo), R);
29052907
}
29062908

2909+
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ARITH_FENCE(SDNode *N) {
2910+
return DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), MVT::i16,
2911+
BitConvertToInteger(N->getOperand(0)));
2912+
}
2913+
29072914
SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) {
29082915
return BitConvertToInteger(N->getOperand(0));
29092916
}

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
726726
void SetSoftPromotedHalf(SDValue Op, SDValue Result);
727727

728728
void SoftPromoteHalfResult(SDNode *N, unsigned ResNo);
729+
SDValue SoftPromoteHalfRes_ARITH_FENCE(SDNode *N);
729730
SDValue SoftPromoteHalfRes_BinOp(SDNode *N);
730731
SDValue SoftPromoteHalfRes_BITCAST(SDNode *N);
731732
SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N);

llvm/test/CodeGen/X86/arithmetic_fence2.ll

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,91 @@ define <8 x float> @f6(<8 x float> %a) {
157157
ret <8 x float> %3
158158
}
159159

160+
define half @f7(half %a) nounwind {
161+
; X86-LABEL: f7:
162+
; X86: # %bb.0:
163+
; X86-NEXT: subl $12, %esp
164+
; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
165+
; X86-NEXT: pextrw $0, %xmm0, %eax
166+
; X86-NEXT: movw %ax, (%esp)
167+
; X86-NEXT: calll __extendhfsf2
168+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
169+
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
170+
; X86-NEXT: addss %xmm0, %xmm0
171+
; X86-NEXT: movss %xmm0, (%esp)
172+
; X86-NEXT: calll __truncsfhf2
173+
; X86-NEXT: pextrw $0, %xmm0, %eax
174+
; X86-NEXT: movw %ax, (%esp)
175+
; X86-NEXT: calll __extendhfsf2
176+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
177+
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
178+
; X86-NEXT: addss %xmm0, %xmm0
179+
; X86-NEXT: movss %xmm0, (%esp)
180+
; X86-NEXT: calll __truncsfhf2
181+
; X86-NEXT: addl $12, %esp
182+
; X86-NEXT: retl
183+
;
184+
; X64-LABEL: f7:
185+
; X64: # %bb.0:
186+
; X64-NEXT: pushq %rax
187+
; X64-NEXT: callq __extendhfsf2@PLT
188+
; X64-NEXT: addss %xmm0, %xmm0
189+
; X64-NEXT: callq __truncsfhf2@PLT
190+
; X64-NEXT: callq __extendhfsf2@PLT
191+
; X64-NEXT: addss %xmm0, %xmm0
192+
; X64-NEXT: callq __truncsfhf2@PLT
193+
; X64-NEXT: popq %rax
194+
; X64-NEXT: retq
195+
%1 = fadd fast half %a, %a
196+
%t = call half @llvm.arithmetic.fence.f16(half %1)
197+
%2 = fadd fast half %a, %a
198+
%3 = fadd fast half %1, %2
199+
ret half %3
200+
}
201+
202+
define bfloat @f8(bfloat %a) nounwind {
203+
; X86-LABEL: f8:
204+
; X86: # %bb.0:
205+
; X86-NEXT: pushl %eax
206+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
207+
; X86-NEXT: shll $16, %eax
208+
; X86-NEXT: movd %eax, %xmm0
209+
; X86-NEXT: addss %xmm0, %xmm0
210+
; X86-NEXT: movss %xmm0, (%esp)
211+
; X86-NEXT: calll __truncsfbf2
212+
; X86-NEXT: pextrw $0, %xmm0, %eax
213+
; X86-NEXT: shll $16, %eax
214+
; X86-NEXT: movd %eax, %xmm0
215+
; X86-NEXT: addss %xmm0, %xmm0
216+
; X86-NEXT: movss %xmm0, (%esp)
217+
; X86-NEXT: calll __truncsfbf2
218+
; X86-NEXT: popl %eax
219+
; X86-NEXT: retl
220+
;
221+
; X64-LABEL: f8:
222+
; X64: # %bb.0:
223+
; X64-NEXT: pushq %rax
224+
; X64-NEXT: pextrw $0, %xmm0, %eax
225+
; X64-NEXT: shll $16, %eax
226+
; X64-NEXT: movd %eax, %xmm0
227+
; X64-NEXT: addss %xmm0, %xmm0
228+
; X64-NEXT: callq __truncsfbf2@PLT
229+
; X64-NEXT: pextrw $0, %xmm0, %eax
230+
; X64-NEXT: shll $16, %eax
231+
; X64-NEXT: movd %eax, %xmm0
232+
; X64-NEXT: addss %xmm0, %xmm0
233+
; X64-NEXT: callq __truncsfbf2@PLT
234+
; X64-NEXT: popq %rax
235+
; X64-NEXT: retq
236+
%1 = fadd fast bfloat %a, %a
237+
%t = call bfloat @llvm.arithmetic.fence.bf16(bfloat %1)
238+
%2 = fadd fast bfloat %a, %a
239+
%3 = fadd fast bfloat %1, %2
240+
ret bfloat %3
241+
}
242+
243+
declare half @llvm.arithmetic.fence.f16(half)
244+
declare bfloat @llvm.arithmetic.fence.bf16(bfloat)
160245
declare float @llvm.arithmetic.fence.f32(float)
161246
declare double @llvm.arithmetic.fence.f64(double)
162247
declare <2 x float> @llvm.arithmetic.fence.v2f32(<2 x float>)

0 commit comments

Comments
 (0)