Skip to content

Commit 5f91335

Browse files
committed
[X86] canonicalizeBitSelect - always use VPTERNLOGD for sub-32bit types
We were using VPTERNLOGQ for everything but i32 types, which made broadcasts wider than necessary Noticed in #73509
1 parent ffd61c1 commit 5f91335

29 files changed

+381
-398
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48707,7 +48707,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
4870748707
if (useVPTERNLOG(Subtarget, VT)) {
4870848708
// Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C.
4870948709
// VPTERNLOG is only available as vXi32/64-bit types.
48710-
MVT OpSVT = EltSizeInBits == 32 ? MVT::i32 : MVT::i64;
48710+
MVT OpSVT = EltSizeInBits <= 32 ? MVT::i32 : MVT::i64;
4871148711
MVT OpVT =
4871248712
MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits());
4871348713
SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1));

llvm/test/CodeGen/X86/avx512fp16-arith.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ define half @fcopysign(half %x, half %y) {
329329
; CHECK-LABEL: fcopysign:
330330
; CHECK: ## %bb.0:
331331
; CHECK-NEXT: vpbroadcastw {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
332-
; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
332+
; CHECK-NEXT: vpternlogd $226, %xmm1, %xmm2, %xmm0
333333
; CHECK-NEXT: retq
334334
%a = call half @llvm.copysign.f16(half %x, half %y)
335335
ret half %a
@@ -384,7 +384,7 @@ declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
384384
define <8 x half> @fcopysignv8f16(<8 x half> %x, <8 x half> %y) {
385385
; CHECK-LABEL: fcopysignv8f16:
386386
; CHECK: ## %bb.0:
387-
; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
387+
; CHECK-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
388388
; CHECK-NEXT: retq
389389
%a = call <8 x half> @llvm.copysign.v8f16(<8 x half> %x, <8 x half> %y)
390390
ret <8 x half> %a
@@ -439,7 +439,7 @@ declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
439439
define <16 x half> @fcopysignv16f16(<16 x half> %x, <16 x half> %y) {
440440
; CHECK-LABEL: fcopysignv16f16:
441441
; CHECK: ## %bb.0:
442-
; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
442+
; CHECK-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
443443
; CHECK-NEXT: retq
444444
%a = call <16 x half> @llvm.copysign.v16f16(<16 x half> %x, <16 x half> %y)
445445
ret <16 x half> %a
@@ -494,7 +494,7 @@ declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
494494
define <32 x half> @fcopysignv32f16(<32 x half> %x, <32 x half> %y) {
495495
; CHECK-LABEL: fcopysignv32f16:
496496
; CHECK: ## %bb.0:
497-
; CHECK-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
497+
; CHECK-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
498498
; CHECK-NEXT: retq
499499
%a = call <32 x half> @llvm.copysign.v32f16(<32 x half> %x, <32 x half> %y)
500500
ret <32 x half> %a

llvm/test/CodeGen/X86/gfni-funnel-shifts.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define <16 x i8> @splatconstant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind
3131
; GFNIAVX512: # %bb.0:
3232
; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm2
3333
; GFNIAVX512-NEXT: vpsrlw $5, %xmm1, %xmm0
34-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0
34+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm0
3535
; GFNIAVX512-NEXT: retq
3636
%res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
3737
ret <16 x i8> %res
@@ -119,7 +119,7 @@ define <32 x i8> @splatconstant_fshl_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
119119
; GFNIAVX512: # %bb.0:
120120
; GFNIAVX512-NEXT: vpsllw $4, %ymm0, %ymm2
121121
; GFNIAVX512-NEXT: vpsrlw $4, %ymm1, %ymm0
122-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
122+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm0
123123
; GFNIAVX512-NEXT: retq
124124
%res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
125125
ret <32 x i8> %res
@@ -175,7 +175,7 @@ define <32 x i8> @splatconstant_fshr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind
175175
; GFNIAVX512: # %bb.0:
176176
; GFNIAVX512-NEXT: vpsllw $2, %ymm0, %ymm2
177177
; GFNIAVX512-NEXT: vpsrlw $6, %ymm1, %ymm0
178-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm2, %ymm0
178+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm2, %ymm0
179179
; GFNIAVX512-NEXT: retq
180180
%res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>)
181181
ret <32 x i8> %res
@@ -339,7 +339,7 @@ define <64 x i8> @splatconstant_fshr_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind
339339
; GFNIAVX512: # %bb.0:
340340
; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm2
341341
; GFNIAVX512-NEXT: vpsrlw $2, %zmm1, %zmm0
342-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
342+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
343343
; GFNIAVX512-NEXT: retq
344344
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %b, <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
345345
ret <64 x i8> %res

llvm/test/CodeGen/X86/gfni-rotates.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ define <16 x i8> @splatconstant_rotl_v16i8(<16 x i8> %a) nounwind {
3232
; GFNIAVX512: # %bb.0:
3333
; GFNIAVX512-NEXT: vpsllw $3, %xmm0, %xmm1
3434
; GFNIAVX512-NEXT: vpsrlw $5, %xmm0, %xmm0
35-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0
35+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0
3636
; GFNIAVX512-NEXT: retq
3737
%res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %a, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
3838
ret <16 x i8> %res
@@ -121,7 +121,7 @@ define <32 x i8> @splatconstant_rotl_v32i8(<32 x i8> %a) nounwind {
121121
; GFNIAVX512: # %bb.0:
122122
; GFNIAVX512-NEXT: vpsllw $4, %ymm0, %ymm1
123123
; GFNIAVX512-NEXT: vpsrlw $4, %ymm0, %ymm0
124-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
124+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
125125
; GFNIAVX512-NEXT: retq
126126
%res = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
127127
ret <32 x i8> %res
@@ -177,7 +177,7 @@ define <32 x i8> @splatconstant_rotr_v32i8(<32 x i8> %a) nounwind {
177177
; GFNIAVX512: # %bb.0:
178178
; GFNIAVX512-NEXT: vpsllw $2, %ymm0, %ymm1
179179
; GFNIAVX512-NEXT: vpsrlw $6, %ymm0, %ymm0
180-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
180+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
181181
; GFNIAVX512-NEXT: retq
182182
%res = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a, <32 x i8> %a, <32 x i8> <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>)
183183
ret <32 x i8> %res
@@ -344,7 +344,7 @@ define <64 x i8> @splatconstant_rotr_v64i8(<64 x i8> %a) nounwind {
344344
; GFNIAVX512: # %bb.0:
345345
; GFNIAVX512-NEXT: vpsllw $6, %zmm0, %zmm1
346346
; GFNIAVX512-NEXT: vpsrlw $2, %zmm0, %zmm0
347-
; GFNIAVX512-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
347+
; GFNIAVX512-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
348348
; GFNIAVX512-NEXT: retq
349349
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a, <64 x i8> %a, <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
350350
ret <64 x i8> %res

llvm/test/CodeGen/X86/min-legal-vector-width.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,7 +2010,7 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind "min-legal-v
20102010
; CHECK: # %bb.0:
20112011
; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1
20122012
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0
2013-
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
2013+
; CHECK-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
20142014
; CHECK-NEXT: retq
20152015
%shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
20162016
%lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
@@ -2023,7 +2023,7 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind "min-le
20232023
; CHECK: # %bb.0:
20242024
; CHECK-NEXT: vpsllw $4, %ymm0, %ymm1
20252025
; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm0
2026-
; CHECK-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0
2026+
; CHECK-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0
20272027
; CHECK-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
20282028
; CHECK-NEXT: retq
20292029
%shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>

llvm/test/CodeGen/X86/vec_fcopysign.ll

Lines changed: 21 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind {
159159
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
160160
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
161161
; X86-AVX512-NEXT: vmovdqa (%ecx), %xmm1
162-
; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
163-
; X86-AVX512-NEXT: vpternlogq $202, (%eax), %xmm1, %xmm0
162+
; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879]
163+
; X86-AVX512-NEXT: vpternlogd $202, (%eax), %xmm1, %xmm0
164164
; X86-AVX512-NEXT: retl
165165
;
166166
; X64-SSE-LABEL: fcopysign_v8f16:
@@ -193,8 +193,8 @@ define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind {
193193
; X64-AVX512-LABEL: fcopysign_v8f16:
194194
; X64-AVX512: # %bb.0:
195195
; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1
196-
; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9223231297218904063,9223231297218904063]
197-
; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %xmm1, %xmm0
196+
; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879]
197+
; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %xmm1, %xmm0
198198
; X64-AVX512-NEXT: retq
199199
%a0 = load <8 x half>, ptr %p0, align 16
200200
%a1 = load <8 x half>, ptr %p1, align 16
@@ -405,8 +405,8 @@ define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind {
405405
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
406406
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
407407
; X86-AVX512-NEXT: vmovdqu (%ecx), %ymm1
408-
; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
409-
; X86-AVX512-NEXT: vpternlogq $202, (%eax), %ymm1, %ymm0
408+
; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
409+
; X86-AVX512-NEXT: vpternlogd $202, (%eax), %ymm1, %ymm0
410410
; X86-AVX512-NEXT: retl
411411
;
412412
; X64-SSE-LABEL: fcopysign_v16f16:
@@ -444,8 +444,8 @@ define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind {
444444
; X64-AVX512-LABEL: fcopysign_v16f16:
445445
; X64-AVX512: # %bb.0:
446446
; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm1
447-
; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063]
448-
; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %ymm1, %ymm0
447+
; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
448+
; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %ymm1, %ymm0
449449
; X64-AVX512-NEXT: retq
450450
%a0 = load <16 x half>, ptr %p0, align 16
451451
%a1 = load <16 x half>, ptr %p1, align 16
@@ -691,34 +691,14 @@ define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind {
691691
; X86-AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
692692
; X86-AVX2-NEXT: retl
693693
;
694-
; X86-AVX512VL-LABEL: fcopysign_v32f16:
695-
; X86-AVX512VL: # %bb.0:
696-
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
697-
; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx
698-
; X86-AVX512VL-NEXT: vmovdqu64 (%ecx), %zmm1
699-
; X86-AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
700-
; X86-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
701-
; X86-AVX512VL-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
702-
; X86-AVX512VL-NEXT: retl
703-
;
704-
; X86-AVX512FP16-LABEL: fcopysign_v32f16:
705-
; X86-AVX512FP16: # %bb.0:
706-
; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %eax
707-
; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %ecx
708-
; X86-AVX512FP16-NEXT: vmovdqu64 (%ecx), %zmm1
709-
; X86-AVX512FP16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
710-
; X86-AVX512FP16-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
711-
; X86-AVX512FP16-NEXT: retl
712-
;
713-
; X86-AVX512VLDQ-LABEL: fcopysign_v32f16:
714-
; X86-AVX512VLDQ: # %bb.0:
715-
; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %eax
716-
; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
717-
; X86-AVX512VLDQ-NEXT: vmovdqu64 (%ecx), %zmm1
718-
; X86-AVX512VLDQ-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
719-
; X86-AVX512VLDQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
720-
; X86-AVX512VLDQ-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
721-
; X86-AVX512VLDQ-NEXT: retl
694+
; X86-AVX512-LABEL: fcopysign_v32f16:
695+
; X86-AVX512: # %bb.0:
696+
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
697+
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
698+
; X86-AVX512-NEXT: vmovdqu64 (%ecx), %zmm1
699+
; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
700+
; X86-AVX512-NEXT: vpternlogd $202, (%eax), %zmm1, %zmm0
701+
; X86-AVX512-NEXT: retl
722702
;
723703
; X64-SSE-LABEL: fcopysign_v32f16:
724704
; X64-SSE: # %bb.0:
@@ -769,8 +749,8 @@ define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind {
769749
; X64-AVX512-LABEL: fcopysign_v32f16:
770750
; X64-AVX512: # %bb.0:
771751
; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm1
772-
; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} zmm0 = [9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063]
773-
; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %zmm1, %zmm0
752+
; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
753+
; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %zmm1, %zmm0
774754
; X64-AVX512-NEXT: retq
775755
%a0 = load <32 x half>, ptr %p0, align 16
776756
%a1 = load <32 x half>, ptr %p1, align 16
@@ -786,3 +766,6 @@ declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)
786766
; X64-AVX512VLDQ: {{.*}}
787767
; X86: {{.*}}
788768
; X86-AVX: {{.*}}
769+
; X86-AVX512FP16: {{.*}}
770+
; X86-AVX512VL: {{.*}}
771+
; X86-AVX512VLDQ: {{.*}}

llvm/test/CodeGen/X86/vector-fshl-128.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2409,7 +2409,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
24092409
; AVX512F: # %bb.0:
24102410
; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm2
24112411
; AVX512F-NEXT: vpsrlw $4, %xmm1, %xmm0
2412-
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
2412+
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
24132413
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
24142414
; AVX512F-NEXT: vzeroupper
24152415
; AVX512F-NEXT: retq
@@ -2418,14 +2418,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
24182418
; AVX512VL: # %bb.0:
24192419
; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2
24202420
; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0
2421-
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0
2421+
; AVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm0
24222422
; AVX512VL-NEXT: retq
24232423
;
24242424
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
24252425
; AVX512BW: # %bb.0:
24262426
; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm2
24272427
; AVX512BW-NEXT: vpsrlw $4, %xmm1, %xmm0
2428-
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
2428+
; AVX512BW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
24292429
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
24302430
; AVX512BW-NEXT: vzeroupper
24312431
; AVX512BW-NEXT: retq
@@ -2434,7 +2434,7 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
24342434
; AVX512VBMI2: # %bb.0:
24352435
; AVX512VBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
24362436
; AVX512VBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
2437-
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm2, %zmm0
2437+
; AVX512VBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm2, %zmm0
24382438
; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
24392439
; AVX512VBMI2-NEXT: vzeroupper
24402440
; AVX512VBMI2-NEXT: retq
@@ -2443,14 +2443,14 @@ define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwi
24432443
; AVX512VLBW: # %bb.0:
24442444
; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2
24452445
; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0
2446-
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0
2446+
; AVX512VLBW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm0
24472447
; AVX512VLBW-NEXT: retq
24482448
;
24492449
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
24502450
; AVX512VLVBMI2: # %bb.0:
24512451
; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
24522452
; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
2453-
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm2, %xmm0
2453+
; AVX512VLVBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm0
24542454
; AVX512VLVBMI2-NEXT: retq
24552455
;
24562456
; XOP-LABEL: splatconstant_funnnel_v16i8:

0 commit comments

Comments
 (0)