@@ -207,140 +207,38 @@ define <4 x float> @fneg(<4 x float> %Q) nounwind {
207
207
208
208
; store(fneg(load())) - convert scalar to integer
209
209
define void @fneg_int_rmw_half (ptr %ptr ) nounwind {
210
- ; X86-SSE1-LABEL: fneg_int_rmw_half:
211
- ; X86-SSE1: # %bb.0:
212
- ; X86-SSE1-NEXT: pushl %esi
213
- ; X86-SSE1-NEXT: subl $8, %esp
214
- ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
215
- ; X86-SSE1-NEXT: movzwl (%esi), %eax
216
- ; X86-SSE1-NEXT: movl %eax, (%esp)
217
- ; X86-SSE1-NEXT: calll __gnu_h2f_ieee
218
- ; X86-SSE1-NEXT: fstps {{[0-9]+}}(%esp)
219
- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220
- ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
221
- ; X86-SSE1-NEXT: movss %xmm0, (%esp)
222
- ; X86-SSE1-NEXT: calll __gnu_f2h_ieee
223
- ; X86-SSE1-NEXT: movw %ax, (%esi)
224
- ; X86-SSE1-NEXT: addl $8, %esp
225
- ; X86-SSE1-NEXT: popl %esi
226
- ; X86-SSE1-NEXT: retl
227
- ;
228
- ; X86-SSE2-LABEL: fneg_int_rmw_half:
229
- ; X86-SSE2: # %bb.0:
230
- ; X86-SSE2-NEXT: pushl %esi
231
- ; X86-SSE2-NEXT: subl $8, %esp
232
- ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
233
- ; X86-SSE2-NEXT: pinsrw $0, (%esi), %xmm0
234
- ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
235
- ; X86-SSE2-NEXT: movw %ax, (%esp)
236
- ; X86-SSE2-NEXT: calll __extendhfsf2
237
- ; X86-SSE2-NEXT: fstps {{[0-9]+}}(%esp)
238
- ; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
239
- ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
240
- ; X86-SSE2-NEXT: movd %xmm0, (%esp)
241
- ; X86-SSE2-NEXT: calll __truncsfhf2
242
- ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
243
- ; X86-SSE2-NEXT: movw %ax, (%esi)
244
- ; X86-SSE2-NEXT: addl $8, %esp
245
- ; X86-SSE2-NEXT: popl %esi
246
- ; X86-SSE2-NEXT: retl
247
- ;
248
- ; X64-SSE1-LABEL: fneg_int_rmw_half:
249
- ; X64-SSE1: # %bb.0:
250
- ; X64-SSE1-NEXT: pushq %rbx
251
- ; X64-SSE1-NEXT: movq %rdi, %rbx
252
- ; X64-SSE1-NEXT: movzwl (%rdi), %edi
253
- ; X64-SSE1-NEXT: callq __gnu_h2f_ieee@PLT
254
- ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
255
- ; X64-SSE1-NEXT: callq __gnu_f2h_ieee@PLT
256
- ; X64-SSE1-NEXT: movw %ax, (%rbx)
257
- ; X64-SSE1-NEXT: popq %rbx
258
- ; X64-SSE1-NEXT: retq
210
+ ; X86-SSE-LABEL: fneg_int_rmw_half:
211
+ ; X86-SSE: # %bb.0:
212
+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
213
+ ; X86-SSE-NEXT: xorb $-128, 1(%eax)
214
+ ; X86-SSE-NEXT: retl
259
215
;
260
- ; X64-SSE2-LABEL: fneg_int_rmw_half:
261
- ; X64-SSE2: # %bb.0:
262
- ; X64-SSE2-NEXT: pushq %rbx
263
- ; X64-SSE2-NEXT: movq %rdi, %rbx
264
- ; X64-SSE2-NEXT: pinsrw $0, (%rdi), %xmm0
265
- ; X64-SSE2-NEXT: callq __extendhfsf2@PLT
266
- ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
267
- ; X64-SSE2-NEXT: callq __truncsfhf2@PLT
268
- ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
269
- ; X64-SSE2-NEXT: movw %ax, (%rbx)
270
- ; X64-SSE2-NEXT: popq %rbx
271
- ; X64-SSE2-NEXT: retq
216
+ ; X64-SSE-LABEL: fneg_int_rmw_half:
217
+ ; X64-SSE: # %bb.0:
218
+ ; X64-SSE-NEXT: xorb $-128, 1(%rdi)
219
+ ; X64-SSE-NEXT: retq
272
220
%1 = load half , ptr %ptr
273
221
%2 = fneg half %1
274
222
store half %2 , ptr %ptr
275
223
ret void
276
224
}
277
225
278
226
define void @fneg_int_bfloat (ptr %src , ptr %dst ) nounwind {
279
- ; X86-SSE1-LABEL: fneg_int_bfloat:
280
- ; X86-SSE1: # %bb.0:
281
- ; X86-SSE1-NEXT: pushl %esi
282
- ; X86-SSE1-NEXT: subl $8, %esp
283
- ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
284
- ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
285
- ; X86-SSE1-NEXT: movzwl (%eax), %eax
286
- ; X86-SSE1-NEXT: shll $16, %eax
287
- ; X86-SSE1-NEXT: movl %eax, {{[0-9]+}}(%esp)
288
- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
289
- ; X86-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
290
- ; X86-SSE1-NEXT: movss %xmm0, (%esp)
291
- ; X86-SSE1-NEXT: calll __truncsfbf2
292
- ; X86-SSE1-NEXT: movw %ax, (%esi)
293
- ; X86-SSE1-NEXT: addl $8, %esp
294
- ; X86-SSE1-NEXT: popl %esi
295
- ; X86-SSE1-NEXT: retl
296
- ;
297
- ; X86-SSE2-LABEL: fneg_int_bfloat:
298
- ; X86-SSE2: # %bb.0:
299
- ; X86-SSE2-NEXT: pushl %esi
300
- ; X86-SSE2-NEXT: pushl %eax
301
- ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
302
- ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
303
- ; X86-SSE2-NEXT: movzwl (%eax), %eax
304
- ; X86-SSE2-NEXT: shll $16, %eax
305
- ; X86-SSE2-NEXT: movd %eax, %xmm0
306
- ; X86-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
307
- ; X86-SSE2-NEXT: movd %xmm0, (%esp)
308
- ; X86-SSE2-NEXT: calll __truncsfbf2
309
- ; X86-SSE2-NEXT: pextrw $0, %xmm0, %eax
310
- ; X86-SSE2-NEXT: movw %ax, (%esi)
311
- ; X86-SSE2-NEXT: addl $4, %esp
312
- ; X86-SSE2-NEXT: popl %esi
313
- ; X86-SSE2-NEXT: retl
314
- ;
315
- ; X64-SSE1-LABEL: fneg_int_bfloat:
316
- ; X64-SSE1: # %bb.0:
317
- ; X64-SSE1-NEXT: pushq %rbx
318
- ; X64-SSE1-NEXT: subq $16, %rsp
319
- ; X64-SSE1-NEXT: movq %rsi, %rbx
320
- ; X64-SSE1-NEXT: movzwl (%rdi), %eax
321
- ; X64-SSE1-NEXT: shll $16, %eax
322
- ; X64-SSE1-NEXT: movl %eax, {{[0-9]+}}(%rsp)
323
- ; X64-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
324
- ; X64-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
325
- ; X64-SSE1-NEXT: callq __truncsfbf2@PLT
326
- ; X64-SSE1-NEXT: movw %ax, (%rbx)
327
- ; X64-SSE1-NEXT: addq $16, %rsp
328
- ; X64-SSE1-NEXT: popq %rbx
329
- ; X64-SSE1-NEXT: retq
227
+ ; X86-SSE-LABEL: fneg_int_bfloat:
228
+ ; X86-SSE: # %bb.0:
229
+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
230
+ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
231
+ ; X86-SSE-NEXT: movzwl (%ecx), %ecx
232
+ ; X86-SSE-NEXT: xorl $32768, %ecx # imm = 0x8000
233
+ ; X86-SSE-NEXT: movw %cx, (%eax)
234
+ ; X86-SSE-NEXT: retl
330
235
;
331
- ; X64-SSE2-LABEL: fneg_int_bfloat:
332
- ; X64-SSE2: # %bb.0:
333
- ; X64-SSE2-NEXT: pushq %rbx
334
- ; X64-SSE2-NEXT: movq %rsi, %rbx
335
- ; X64-SSE2-NEXT: movzwl (%rdi), %eax
336
- ; X64-SSE2-NEXT: shll $16, %eax
337
- ; X64-SSE2-NEXT: movd %eax, %xmm0
338
- ; X64-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
339
- ; X64-SSE2-NEXT: callq __truncsfbf2@PLT
340
- ; X64-SSE2-NEXT: pextrw $0, %xmm0, %eax
341
- ; X64-SSE2-NEXT: movw %ax, (%rbx)
342
- ; X64-SSE2-NEXT: popq %rbx
343
- ; X64-SSE2-NEXT: retq
236
+ ; X64-SSE-LABEL: fneg_int_bfloat:
237
+ ; X64-SSE: # %bb.0:
238
+ ; X64-SSE-NEXT: movzwl (%rdi), %eax
239
+ ; X64-SSE-NEXT: xorl $32768, %eax # imm = 0x8000
240
+ ; X64-SSE-NEXT: movw %ax, (%rsi)
241
+ ; X64-SSE-NEXT: retq
344
242
%1 = load bfloat, ptr %src
345
243
%2 = fneg bfloat %1
346
244
store bfloat %2 , ptr %dst
0 commit comments