@@ -159,8 +159,8 @@ define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind {
159
159
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
160
160
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
161
161
; X86-AVX512-NEXT: vmovdqa (%ecx), %xmm1
162
- ; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN ]
163
- ; X86-AVX512-NEXT: vpternlogq $202, (%eax), %xmm1, %xmm0
162
+ ; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879 ]
163
+ ; X86-AVX512-NEXT: vpternlogd $202, (%eax), %xmm1, %xmm0
164
164
; X86-AVX512-NEXT: retl
165
165
;
166
166
; X64-SSE-LABEL: fcopysign_v8f16:
@@ -193,8 +193,8 @@ define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind {
193
193
; X64-AVX512-LABEL: fcopysign_v8f16:
194
194
; X64-AVX512: # %bb.0:
195
195
; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1
196
- ; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9223231297218904063,9223231297218904063 ]
197
- ; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %xmm1, %xmm0
196
+ ; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879 ]
197
+ ; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %xmm1, %xmm0
198
198
; X64-AVX512-NEXT: retq
199
199
%a0 = load <8 x half >, ptr %p0 , align 16
200
200
%a1 = load <8 x half >, ptr %p1 , align 16
@@ -405,8 +405,8 @@ define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind {
405
405
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
406
406
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
407
407
; X86-AVX512-NEXT: vmovdqu (%ecx), %ymm1
408
- ; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN ]
409
- ; X86-AVX512-NEXT: vpternlogq $202, (%eax), %ymm1, %ymm0
408
+ ; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879 ]
409
+ ; X86-AVX512-NEXT: vpternlogd $202, (%eax), %ymm1, %ymm0
410
410
; X86-AVX512-NEXT: retl
411
411
;
412
412
; X64-SSE-LABEL: fcopysign_v16f16:
@@ -444,8 +444,8 @@ define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind {
444
444
; X64-AVX512-LABEL: fcopysign_v16f16:
445
445
; X64-AVX512: # %bb.0:
446
446
; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm1
447
- ; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063 ]
448
- ; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %ymm1, %ymm0
447
+ ; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879 ]
448
+ ; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %ymm1, %ymm0
449
449
; X64-AVX512-NEXT: retq
450
450
%a0 = load <16 x half >, ptr %p0 , align 16
451
451
%a1 = load <16 x half >, ptr %p1 , align 16
@@ -691,34 +691,14 @@ define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind {
691
691
; X86-AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
692
692
; X86-AVX2-NEXT: retl
693
693
;
694
- ; X86-AVX512VL-LABEL: fcopysign_v32f16:
695
- ; X86-AVX512VL: # %bb.0:
696
- ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
697
- ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx
698
- ; X86-AVX512VL-NEXT: vmovdqu64 (%ecx), %zmm1
699
- ; X86-AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
700
- ; X86-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
701
- ; X86-AVX512VL-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
702
- ; X86-AVX512VL-NEXT: retl
703
- ;
704
- ; X86-AVX512FP16-LABEL: fcopysign_v32f16:
705
- ; X86-AVX512FP16: # %bb.0:
706
- ; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %eax
707
- ; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %ecx
708
- ; X86-AVX512FP16-NEXT: vmovdqu64 (%ecx), %zmm1
709
- ; X86-AVX512FP16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
710
- ; X86-AVX512FP16-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
711
- ; X86-AVX512FP16-NEXT: retl
712
- ;
713
- ; X86-AVX512VLDQ-LABEL: fcopysign_v32f16:
714
- ; X86-AVX512VLDQ: # %bb.0:
715
- ; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %eax
716
- ; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
717
- ; X86-AVX512VLDQ-NEXT: vmovdqu64 (%ecx), %zmm1
718
- ; X86-AVX512VLDQ-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
719
- ; X86-AVX512VLDQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
720
- ; X86-AVX512VLDQ-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
721
- ; X86-AVX512VLDQ-NEXT: retl
694
+ ; X86-AVX512-LABEL: fcopysign_v32f16:
695
+ ; X86-AVX512: # %bb.0:
696
+ ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
697
+ ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
698
+ ; X86-AVX512-NEXT: vmovdqu64 (%ecx), %zmm1
699
+ ; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
700
+ ; X86-AVX512-NEXT: vpternlogd $202, (%eax), %zmm1, %zmm0
701
+ ; X86-AVX512-NEXT: retl
722
702
;
723
703
; X64-SSE-LABEL: fcopysign_v32f16:
724
704
; X64-SSE: # %bb.0:
@@ -769,8 +749,8 @@ define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind {
769
749
; X64-AVX512-LABEL: fcopysign_v32f16:
770
750
; X64-AVX512: # %bb.0:
771
751
; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm1
772
- ; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} zmm0 = [9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063 ]
773
- ; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %zmm1, %zmm0
752
+ ; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879 ]
753
+ ; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %zmm1, %zmm0
774
754
; X64-AVX512-NEXT: retq
775
755
%a0 = load <32 x half >, ptr %p0 , align 16
776
756
%a1 = load <32 x half >, ptr %p1 , align 16
@@ -786,3 +766,6 @@ declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)
786
766
; X64-AVX512VLDQ: {{.*}}
787
767
; X86: {{.*}}
788
768
; X86-AVX: {{.*}}
769
+ ; X86-AVX512FP16: {{.*}}
770
+ ; X86-AVX512VL: {{.*}}
771
+ ; X86-AVX512VLDQ: {{.*}}
0 commit comments