@@ -157,6 +157,91 @@ define <8 x float> @f6(<8 x float> %a) {
157
157
ret <8 x float > %3
158
158
}
159
159
160
+ define half @f7 (half %a ) nounwind {
161
+ ; X86-LABEL: f7:
162
+ ; X86: # %bb.0:
163
+ ; X86-NEXT: subl $12, %esp
164
+ ; X86-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0
165
+ ; X86-NEXT: pextrw $0, %xmm0, %eax
166
+ ; X86-NEXT: movw %ax, (%esp)
167
+ ; X86-NEXT: calll __extendhfsf2
168
+ ; X86-NEXT: fstps {{[0-9]+}}(%esp)
169
+ ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
170
+ ; X86-NEXT: addss %xmm0, %xmm0
171
+ ; X86-NEXT: movss %xmm0, (%esp)
172
+ ; X86-NEXT: calll __truncsfhf2
173
+ ; X86-NEXT: pextrw $0, %xmm0, %eax
174
+ ; X86-NEXT: movw %ax, (%esp)
175
+ ; X86-NEXT: calll __extendhfsf2
176
+ ; X86-NEXT: fstps {{[0-9]+}}(%esp)
177
+ ; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
178
+ ; X86-NEXT: addss %xmm0, %xmm0
179
+ ; X86-NEXT: movss %xmm0, (%esp)
180
+ ; X86-NEXT: calll __truncsfhf2
181
+ ; X86-NEXT: addl $12, %esp
182
+ ; X86-NEXT: retl
183
+ ;
184
+ ; X64-LABEL: f7:
185
+ ; X64: # %bb.0:
186
+ ; X64-NEXT: pushq %rax
187
+ ; X64-NEXT: callq __extendhfsf2@PLT
188
+ ; X64-NEXT: addss %xmm0, %xmm0
189
+ ; X64-NEXT: callq __truncsfhf2@PLT
190
+ ; X64-NEXT: callq __extendhfsf2@PLT
191
+ ; X64-NEXT: addss %xmm0, %xmm0
192
+ ; X64-NEXT: callq __truncsfhf2@PLT
193
+ ; X64-NEXT: popq %rax
194
+ ; X64-NEXT: retq
195
+ %1 = fadd fast half %a , %a
196
+ %t = call half @llvm.arithmetic.fence.f16 (half %1 )
197
+ %2 = fadd fast half %a , %a
198
+ %3 = fadd fast half %1 , %2
199
+ ret half %3
200
+ }
201
+
202
+ define bfloat @f8 (bfloat %a ) nounwind {
203
+ ; X86-LABEL: f8:
204
+ ; X86: # %bb.0:
205
+ ; X86-NEXT: pushl %eax
206
+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
207
+ ; X86-NEXT: shll $16, %eax
208
+ ; X86-NEXT: movd %eax, %xmm0
209
+ ; X86-NEXT: addss %xmm0, %xmm0
210
+ ; X86-NEXT: movss %xmm0, (%esp)
211
+ ; X86-NEXT: calll __truncsfbf2
212
+ ; X86-NEXT: pextrw $0, %xmm0, %eax
213
+ ; X86-NEXT: shll $16, %eax
214
+ ; X86-NEXT: movd %eax, %xmm0
215
+ ; X86-NEXT: addss %xmm0, %xmm0
216
+ ; X86-NEXT: movss %xmm0, (%esp)
217
+ ; X86-NEXT: calll __truncsfbf2
218
+ ; X86-NEXT: popl %eax
219
+ ; X86-NEXT: retl
220
+ ;
221
+ ; X64-LABEL: f8:
222
+ ; X64: # %bb.0:
223
+ ; X64-NEXT: pushq %rax
224
+ ; X64-NEXT: pextrw $0, %xmm0, %eax
225
+ ; X64-NEXT: shll $16, %eax
226
+ ; X64-NEXT: movd %eax, %xmm0
227
+ ; X64-NEXT: addss %xmm0, %xmm0
228
+ ; X64-NEXT: callq __truncsfbf2@PLT
229
+ ; X64-NEXT: pextrw $0, %xmm0, %eax
230
+ ; X64-NEXT: shll $16, %eax
231
+ ; X64-NEXT: movd %eax, %xmm0
232
+ ; X64-NEXT: addss %xmm0, %xmm0
233
+ ; X64-NEXT: callq __truncsfbf2@PLT
234
+ ; X64-NEXT: popq %rax
235
+ ; X64-NEXT: retq
236
+ %1 = fadd fast bfloat %a , %a
237
+ %t = call bfloat @llvm.arithmetic.fence.bf16 (bfloat %1 )
238
+ %2 = fadd fast bfloat %a , %a
239
+ %3 = fadd fast bfloat %1 , %2
240
+ ret bfloat %3
241
+ }
242
+
243
+ declare half @llvm.arithmetic.fence.f16 (half )
244
+ declare bfloat @llvm.arithmetic.fence.bf16 (bfloat)
160
245
declare float @llvm.arithmetic.fence.f32 (float )
161
246
declare double @llvm.arithmetic.fence.f64 (double )
162
247
declare <2 x float > @llvm.arithmetic.fence.v2f32 (<2 x float >)
0 commit comments