-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[SelectionDAG] Use SimplifyDemandedBits from SimplifyDemandedVectorElts Bitcast. #133717
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ts Bitcast. This adds a call to SimplifyDemandedBits from bitcasts with scalar input types in SimplifyDemandedVectorElts, which can help simplify the input scalar.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThis adds a call to SimplifyDemandedBits from bitcasts with scalar input types in SimplifyDemandedVectorElts, which can help simplify the input scalar. Full diff: https://github.com/llvm/llvm-project/pull/133717.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 10006a9d76785..c249929d35d5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3163,10 +3163,22 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- // We only handle vectors here.
- // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
- if (!SrcVT.isVector())
+ if (!SrcVT.isVector()) {
+ // TODO - bigendian once we have test coverage.
+ if (IsLE) {
+ APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = i * VT.getScalarSizeInBits();
+ DemandedSrcBits.insertBits(
+ APInt::getAllOnes(VT.getScalarSizeInBits()), Offset);
+ }
+ KnownBits Known;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
+ return true;
+ }
break;
+ }
// Fast handling of 'identity' bitcasts.
unsigned NumSrcElts = SrcVT.getVectorNumElements();
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 195c740022d10..85daa3ca6623e 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -217,17 +217,28 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
}
define void @extractbitcastext(i32 %bytes, ptr %output) {
-; CHECK-LABEL: extractbitcastext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-NEXT: stp q1, q0, [x1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: extractbitcastext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT: stp q1, q0, [x1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extractbitcastext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: sxtw x8, w0
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-NEXT: stp q1, q0, [x1]
+; CHECK-GI-NEXT: ret
%conv = sext i32 %bytes to i64
%b0 = bitcast i64 %conv to <8 x i8>
%b1 = zext <8 x i8> %b0 to <8 x i16>
@@ -244,17 +255,28 @@ define void @extractbitcastext(i32 %bytes, ptr %output) {
}
define void @extractbitcastext_s(i32 %bytes, ptr %output) {
-; CHECK-LABEL: extractbitcastext_s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: sshll v1.2d, v0.2s, #0
-; CHECK-NEXT: sshll2 v0.2d, v0.4s, #0
-; CHECK-NEXT: stp q1, q0, [x1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: extractbitcastext_s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT: stp q1, q0, [x1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extractbitcastext_s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: sxtw x8, w0
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll2 v0.2d, v0.4s, #0
+; CHECK-GI-NEXT: stp q1, q0, [x1]
+; CHECK-GI-NEXT: ret
%conv = sext i32 %bytes to i64
%b0 = bitcast i64 %conv to <8 x i8>
%b1 = sext <8 x i8> %b0 to <8 x i16>
@@ -271,3 +293,5 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) {
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index 9ba3866ad4730..77fa9f297e678 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -371,7 +371,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: strd r0, r1, [sp]
+; CHECK-LE-NEXT: str r0, [sp]
; CHECK-LE-NEXT: mov r0, sp
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
; CHECK-LE-NEXT: vmov r0, s0
@@ -420,7 +420,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: strd r0, r1, [sp]
+; CHECK-LE-NEXT: str r0, [sp]
; CHECK-LE-NEXT: mov r0, sp
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
; CHECK-LE-NEXT: vmov r0, s1
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
index 1f539f1652004..4eca61d08af7f 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
@@ -27,8 +27,8 @@ define <2 x i2> @i2x2_splat(i1 %x) {
; CHECK-LABEL: not_a_vec:
; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
-; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0
-; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]
+; CHECK-NEXT: i64x2.splat $push[[L:[0-9]+]]=, $0
+; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L]], $2, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: return $pop[[R]]
define <4 x i32> @not_a_vec(i128 %x) {
%a = bitcast i128 %x to <4 x i32>
diff --git a/llvm/test/CodeGen/X86/kmov.ll b/llvm/test/CodeGen/X86/kmov.ll
index 55fb2527722a4..5e31baa1ec72f 100644
--- a/llvm/test/CodeGen/X86/kmov.ll
+++ b/llvm/test/CodeGen/X86/kmov.ll
@@ -386,36 +386,28 @@ define <32 x i1> @invert_i32_mask_extract_32(i32 %mask) {
define <32 x i1> @i64_mask_extract_32(i64 %mask) {
; X64-AVX512-LABEL: i64_mask_extract_32:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: movq %rdi, %rax
-; X64-AVX512-NEXT: kmovd %eax, %k0
-; X64-AVX512-NEXT: movzbl %ah, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
-; X64-AVX512-NEXT: movl %eax, %ecx
-; X64-AVX512-NEXT: shrl $24, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: shrl $16, %eax
-; X64-AVX512-NEXT: movzbl %al, %eax
-; X64-AVX512-NEXT: kmovd %eax, %k2
-; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
-; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
+; X64-AVX512-NEXT: kmovq %rdi, %k0
+; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
+; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
+; X64-AVX512-NEXT: kshiftrd $16, %k0, %k2
+; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
+; X64-AVX512-NEXT: kunpckbw %k2, %k0, %k0
+; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
; X64-AVX512-NEXT: retq
;
; X64-KNL-LABEL: i64_mask_extract_32:
; X64-KNL: # %bb.0:
-; X64-KNL-NEXT: movq %rdi, %rax
-; X64-KNL-NEXT: movl %eax, %ecx
+; X64-KNL-NEXT: movl %edi, %eax
+; X64-KNL-NEXT: shrl $16, %eax
; X64-KNL-NEXT: kmovw %eax, %k0
-; X64-KNL-NEXT: movzbl %ah, %edx
-; X64-KNL-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-KNL-NEXT: movl %edi, %eax
; X64-KNL-NEXT: shrl $24, %eax
; X64-KNL-NEXT: kmovw %eax, %k1
-; X64-KNL-NEXT: shrl $16, %ecx
-; X64-KNL-NEXT: movzbl %cl, %eax
-; X64-KNL-NEXT: kmovw %eax, %k2
-; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
-; X64-KNL-NEXT: kmovw %edx, %k2
+; X64-KNL-NEXT: kunpckbw %k0, %k1, %k1
+; X64-KNL-NEXT: kmovw %edi, %k0
+; X64-KNL-NEXT: shrl $8, %edi
+; X64-KNL-NEXT: kmovw %edi, %k2
; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
@@ -480,82 +472,56 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
define <64 x i1> @i64_mask_extract_64(i64 %mask) {
; X64-AVX512-LABEL: i64_mask_extract_64:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: movq %rdi, %rax
-; X64-AVX512-NEXT: kmovd %eax, %k0
-; X64-AVX512-NEXT: movzbl %ah, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
-; X64-AVX512-NEXT: movl %eax, %ecx
-; X64-AVX512-NEXT: shrl $24, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: movl %eax, %ecx
-; X64-AVX512-NEXT: shrl $16, %ecx
-; X64-AVX512-NEXT: movzbl %cl, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k2
-; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
-; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
-; X64-AVX512-NEXT: movq %rdi, %rcx
-; X64-AVX512-NEXT: shrq $32, %rcx
-; X64-AVX512-NEXT: movzbl %cl, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: movq %rdi, %rcx
-; X64-AVX512-NEXT: shrq $40, %rcx
-; X64-AVX512-NEXT: movzbl %cl, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k2
+; X64-AVX512-NEXT: kmovq %rdi, %k0
+; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
+; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
-; X64-AVX512-NEXT: movq %rdi, %rcx
-; X64-AVX512-NEXT: shrq $56, %rcx
-; X64-AVX512-NEXT: kmovd %ecx, %k2
-; X64-AVX512-NEXT: shrq $48, %rax
-; X64-AVX512-NEXT: movzbl %al, %eax
-; X64-AVX512-NEXT: kmovd %eax, %k3
-; X64-AVX512-NEXT: kunpckbw %k3, %k2, %k2
+; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
+; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
+; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
+; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
+; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
+; X64-AVX512-NEXT: kshiftrd $16, %k0, %k3
+; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
+; X64-AVX512-NEXT: kunpckbw %k3, %k0, %k0
+; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
; X64-AVX512-NEXT: retq
;
; X64-KNL-LABEL: i64_mask_extract_64:
; X64-KNL: # %bb.0:
-; X64-KNL-NEXT: pushq %rbx
-; X64-KNL-NEXT: .cfi_def_cfa_offset 16
-; X64-KNL-NEXT: .cfi_offset %rbx, -16
-; X64-KNL-NEXT: movq %rsi, %rcx
; X64-KNL-NEXT: movq %rdi, %rax
-; X64-KNL-NEXT: movl %ecx, %edx
-; X64-KNL-NEXT: movq %rsi, %rdi
-; X64-KNL-NEXT: movq %rsi, %r8
-; X64-KNL-NEXT: movq %rsi, %r9
-; X64-KNL-NEXT: kmovw %ecx, %k0
-; X64-KNL-NEXT: movzbl %ch, %ebx
-; X64-KNL-NEXT: # kill: def $ecx killed $ecx killed $rcx
-; X64-KNL-NEXT: shrl $24, %ecx
+; X64-KNL-NEXT: kmovw %esi, %k0
+; X64-KNL-NEXT: movl %esi, %ecx
+; X64-KNL-NEXT: shrl $8, %ecx
+; X64-KNL-NEXT: kmovw %ecx, %k1
+; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
+; X64-KNL-NEXT: movl %esi, %ecx
+; X64-KNL-NEXT: shrl $16, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k1
-; X64-KNL-NEXT: shrl $16, %edx
-; X64-KNL-NEXT: movzbl %dl, %ecx
+; X64-KNL-NEXT: movl %esi, %ecx
+; X64-KNL-NEXT: shrl $24, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k2
-; X64-KNL-NEXT: shrq $32, %rsi
-; X64-KNL-NEXT: movzbl %sil, %ecx
+; X64-KNL-NEXT: kunpckbw %k1, %k2, %k1
+; X64-KNL-NEXT: movq %rsi, %rcx
+; X64-KNL-NEXT: shrq $32, %rcx
+; X64-KNL-NEXT: kmovw %ecx, %k2
+; X64-KNL-NEXT: movq %rsi, %rcx
+; X64-KNL-NEXT: shrq $40, %rcx
+; X64-KNL-NEXT: kmovw %ecx, %k3
+; X64-KNL-NEXT: kunpckbw %k2, %k3, %k2
+; X64-KNL-NEXT: movq %rsi, %rcx
+; X64-KNL-NEXT: shrq $48, %rcx
; X64-KNL-NEXT: kmovw %ecx, %k3
-; X64-KNL-NEXT: shrq $40, %rdi
-; X64-KNL-NEXT: movzbl %dil, %ecx
-; X64-KNL-NEXT: kmovw %ecx, %k4
-; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
-; X64-KNL-NEXT: shrq $56, %r8
-; X64-KNL-NEXT: kmovw %r8d, %k2
+; X64-KNL-NEXT: shrq $56, %rsi
+; X64-KNL-NEXT: kmovw %esi, %k4
; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
-; X64-KNL-NEXT: shrq $48, %r9
-; X64-KNL-NEXT: movzbl %r9b, %ecx
-; X64-KNL-NEXT: kmovw %ecx, %k4
-; X64-KNL-NEXT: kunpckbw %k4, %k2, %k2
-; X64-KNL-NEXT: kmovw %ebx, %k4
-; X64-KNL-NEXT: kunpckbw %k0, %k4, %k0
-; X64-KNL-NEXT: kmovw %k0, (%rax)
-; X64-KNL-NEXT: kmovw %k2, 6(%rax)
-; X64-KNL-NEXT: kmovw %k3, 4(%rax)
-; X64-KNL-NEXT: kmovw %k1, 2(%rax)
-; X64-KNL-NEXT: popq %rbx
-; X64-KNL-NEXT: .cfi_def_cfa_offset 8
+; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
+; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
+; X64-KNL-NEXT: kmovw %k1, 2(%rdi)
+; X64-KNL-NEXT: kmovw %k0, (%rdi)
; X64-KNL-NEXT: retq
%.splatinsert = insertelement <64 x i64> poison, i64 %mask, i64 0
%.splat = shufflevector <64 x i64> %.splatinsert, <64 x i64> poison, <64 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index f0f430abc48dc..060bd1764d3c4 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -417,7 +417,6 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
; AVX512F-NEXT: seta %al
-; AVX512F-NEXT: negb %al
; AVX512F-NEXT: kmovd %eax, %k1
; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0
|
@llvm/pr-subscribers-backend-webassembly Author: David Green (davemgreen) ChangesThis adds a call to SimplifyDemandedBits from bitcasts with scalar input types in SimplifyDemandedVectorElts, which can help simplify the input scalar. Full diff: https://github.com/llvm/llvm-project/pull/133717.diff 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 10006a9d76785..c249929d35d5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3163,10 +3163,22 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
- // We only handle vectors here.
- // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
- if (!SrcVT.isVector())
+ if (!SrcVT.isVector()) {
+ // TODO - bigendian once we have test coverage.
+ if (IsLE) {
+ APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = i * VT.getScalarSizeInBits();
+ DemandedSrcBits.insertBits(
+ APInt::getAllOnes(VT.getScalarSizeInBits()), Offset);
+ }
+ KnownBits Known;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
+ return true;
+ }
break;
+ }
// Fast handling of 'identity' bitcasts.
unsigned NumSrcElts = SrcVT.getVectorNumElements();
diff --git a/llvm/test/CodeGen/AArch64/bitcast-extend.ll b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
index 195c740022d10..85daa3ca6623e 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-extend.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-extend.ll
@@ -217,17 +217,28 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
}
define void @extractbitcastext(i32 %bytes, ptr %output) {
-; CHECK-LABEL: extractbitcastext:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ushll v1.2d, v0.2s, #0
-; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
-; CHECK-NEXT: stp q1, q0, [x1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: extractbitcastext:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT: stp q1, q0, [x1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extractbitcastext:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: sxtw x8, w0
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
+; CHECK-GI-NEXT: stp q1, q0, [x1]
+; CHECK-GI-NEXT: ret
%conv = sext i32 %bytes to i64
%b0 = bitcast i64 %conv to <8 x i8>
%b1 = zext <8 x i8> %b0 to <8 x i16>
@@ -244,17 +255,28 @@ define void @extractbitcastext(i32 %bytes, ptr %output) {
}
define void @extractbitcastext_s(i32 %bytes, ptr %output) {
-; CHECK-LABEL: extractbitcastext_s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
-; CHECK-NEXT: sxtw x8, w0
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: sshll v1.2d, v0.2s, #0
-; CHECK-NEXT: sshll2 v0.2d, v0.4s, #0
-; CHECK-NEXT: stp q1, q0, [x1]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: extractbitcastext_s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT: fmov d0, x0
+; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-SD-NEXT: sshll2 v0.2d, v0.4s, #0
+; CHECK-SD-NEXT: stp q1, q0, [x1]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: extractbitcastext_s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT: sxtw x8, w0
+; CHECK-GI-NEXT: fmov d0, x8
+; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-GI-NEXT: sshll2 v0.2d, v0.4s, #0
+; CHECK-GI-NEXT: stp q1, q0, [x1]
+; CHECK-GI-NEXT: ret
%conv = sext i32 %bytes to i64
%b0 = bitcast i64 %conv to <8 x i8>
%b1 = sext <8 x i8> %b0 to <8 x i16>
@@ -271,3 +293,5 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) {
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vdup.ll b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
index 9ba3866ad4730..77fa9f297e678 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vdup.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vdup.ll
@@ -371,7 +371,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: strd r0, r1, [sp]
+; CHECK-LE-NEXT: str r0, [sp]
; CHECK-LE-NEXT: mov r0, sp
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
; CHECK-LE-NEXT: vmov r0, s0
@@ -420,7 +420,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
-; CHECK-LE-NEXT: strd r0, r1, [sp]
+; CHECK-LE-NEXT: str r0, [sp]
; CHECK-LE-NEXT: mov r0, sp
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
; CHECK-LE-NEXT: vmov r0, s1
diff --git a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
index 1f539f1652004..4eca61d08af7f 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
@@ -27,8 +27,8 @@ define <2 x i2> @i2x2_splat(i1 %x) {
; CHECK-LABEL: not_a_vec:
; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
-; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0
-; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]
+; CHECK-NEXT: i64x2.splat $push[[L:[0-9]+]]=, $0
+; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L]], $2, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
; CHECK-NEXT: return $pop[[R]]
define <4 x i32> @not_a_vec(i128 %x) {
%a = bitcast i128 %x to <4 x i32>
diff --git a/llvm/test/CodeGen/X86/kmov.ll b/llvm/test/CodeGen/X86/kmov.ll
index 55fb2527722a4..5e31baa1ec72f 100644
--- a/llvm/test/CodeGen/X86/kmov.ll
+++ b/llvm/test/CodeGen/X86/kmov.ll
@@ -386,36 +386,28 @@ define <32 x i1> @invert_i32_mask_extract_32(i32 %mask) {
define <32 x i1> @i64_mask_extract_32(i64 %mask) {
; X64-AVX512-LABEL: i64_mask_extract_32:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: movq %rdi, %rax
-; X64-AVX512-NEXT: kmovd %eax, %k0
-; X64-AVX512-NEXT: movzbl %ah, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
-; X64-AVX512-NEXT: movl %eax, %ecx
-; X64-AVX512-NEXT: shrl $24, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: shrl $16, %eax
-; X64-AVX512-NEXT: movzbl %al, %eax
-; X64-AVX512-NEXT: kmovd %eax, %k2
-; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
-; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
+; X64-AVX512-NEXT: kmovq %rdi, %k0
+; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
+; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
+; X64-AVX512-NEXT: kshiftrd $16, %k0, %k2
+; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
+; X64-AVX512-NEXT: kunpckbw %k2, %k0, %k0
+; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
; X64-AVX512-NEXT: retq
;
; X64-KNL-LABEL: i64_mask_extract_32:
; X64-KNL: # %bb.0:
-; X64-KNL-NEXT: movq %rdi, %rax
-; X64-KNL-NEXT: movl %eax, %ecx
+; X64-KNL-NEXT: movl %edi, %eax
+; X64-KNL-NEXT: shrl $16, %eax
; X64-KNL-NEXT: kmovw %eax, %k0
-; X64-KNL-NEXT: movzbl %ah, %edx
-; X64-KNL-NEXT: # kill: def $eax killed $eax killed $rax
+; X64-KNL-NEXT: movl %edi, %eax
; X64-KNL-NEXT: shrl $24, %eax
; X64-KNL-NEXT: kmovw %eax, %k1
-; X64-KNL-NEXT: shrl $16, %ecx
-; X64-KNL-NEXT: movzbl %cl, %eax
-; X64-KNL-NEXT: kmovw %eax, %k2
-; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
-; X64-KNL-NEXT: kmovw %edx, %k2
+; X64-KNL-NEXT: kunpckbw %k0, %k1, %k1
+; X64-KNL-NEXT: kmovw %edi, %k0
+; X64-KNL-NEXT: shrl $8, %edi
+; X64-KNL-NEXT: kmovw %edi, %k2
; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
@@ -480,82 +472,56 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
define <64 x i1> @i64_mask_extract_64(i64 %mask) {
; X64-AVX512-LABEL: i64_mask_extract_64:
; X64-AVX512: # %bb.0:
-; X64-AVX512-NEXT: movq %rdi, %rax
-; X64-AVX512-NEXT: kmovd %eax, %k0
-; X64-AVX512-NEXT: movzbl %ah, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
-; X64-AVX512-NEXT: movl %eax, %ecx
-; X64-AVX512-NEXT: shrl $24, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: movl %eax, %ecx
-; X64-AVX512-NEXT: shrl $16, %ecx
-; X64-AVX512-NEXT: movzbl %cl, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k2
-; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
-; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
-; X64-AVX512-NEXT: movq %rdi, %rcx
-; X64-AVX512-NEXT: shrq $32, %rcx
-; X64-AVX512-NEXT: movzbl %cl, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k1
-; X64-AVX512-NEXT: movq %rdi, %rcx
-; X64-AVX512-NEXT: shrq $40, %rcx
-; X64-AVX512-NEXT: movzbl %cl, %ecx
-; X64-AVX512-NEXT: kmovd %ecx, %k2
+; X64-AVX512-NEXT: kmovq %rdi, %k0
+; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
+; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
-; X64-AVX512-NEXT: movq %rdi, %rcx
-; X64-AVX512-NEXT: shrq $56, %rcx
-; X64-AVX512-NEXT: kmovd %ecx, %k2
-; X64-AVX512-NEXT: shrq $48, %rax
-; X64-AVX512-NEXT: movzbl %al, %eax
-; X64-AVX512-NEXT: kmovd %eax, %k3
-; X64-AVX512-NEXT: kunpckbw %k3, %k2, %k2
+; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
+; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
+; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
+; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
+; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
+; X64-AVX512-NEXT: kshiftrd $16, %k0, %k3
+; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
+; X64-AVX512-NEXT: kunpckbw %k3, %k0, %k0
+; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
; X64-AVX512-NEXT: retq
;
; X64-KNL-LABEL: i64_mask_extract_64:
; X64-KNL: # %bb.0:
-; X64-KNL-NEXT: pushq %rbx
-; X64-KNL-NEXT: .cfi_def_cfa_offset 16
-; X64-KNL-NEXT: .cfi_offset %rbx, -16
-; X64-KNL-NEXT: movq %rsi, %rcx
; X64-KNL-NEXT: movq %rdi, %rax
-; X64-KNL-NEXT: movl %ecx, %edx
-; X64-KNL-NEXT: movq %rsi, %rdi
-; X64-KNL-NEXT: movq %rsi, %r8
-; X64-KNL-NEXT: movq %rsi, %r9
-; X64-KNL-NEXT: kmovw %ecx, %k0
-; X64-KNL-NEXT: movzbl %ch, %ebx
-; X64-KNL-NEXT: # kill: def $ecx killed $ecx killed $rcx
-; X64-KNL-NEXT: shrl $24, %ecx
+; X64-KNL-NEXT: kmovw %esi, %k0
+; X64-KNL-NEXT: movl %esi, %ecx
+; X64-KNL-NEXT: shrl $8, %ecx
+; X64-KNL-NEXT: kmovw %ecx, %k1
+; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
+; X64-KNL-NEXT: movl %esi, %ecx
+; X64-KNL-NEXT: shrl $16, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k1
-; X64-KNL-NEXT: shrl $16, %edx
-; X64-KNL-NEXT: movzbl %dl, %ecx
+; X64-KNL-NEXT: movl %esi, %ecx
+; X64-KNL-NEXT: shrl $24, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k2
-; X64-KNL-NEXT: shrq $32, %rsi
-; X64-KNL-NEXT: movzbl %sil, %ecx
+; X64-KNL-NEXT: kunpckbw %k1, %k2, %k1
+; X64-KNL-NEXT: movq %rsi, %rcx
+; X64-KNL-NEXT: shrq $32, %rcx
+; X64-KNL-NEXT: kmovw %ecx, %k2
+; X64-KNL-NEXT: movq %rsi, %rcx
+; X64-KNL-NEXT: shrq $40, %rcx
+; X64-KNL-NEXT: kmovw %ecx, %k3
+; X64-KNL-NEXT: kunpckbw %k2, %k3, %k2
+; X64-KNL-NEXT: movq %rsi, %rcx
+; X64-KNL-NEXT: shrq $48, %rcx
; X64-KNL-NEXT: kmovw %ecx, %k3
-; X64-KNL-NEXT: shrq $40, %rdi
-; X64-KNL-NEXT: movzbl %dil, %ecx
-; X64-KNL-NEXT: kmovw %ecx, %k4
-; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
-; X64-KNL-NEXT: shrq $56, %r8
-; X64-KNL-NEXT: kmovw %r8d, %k2
+; X64-KNL-NEXT: shrq $56, %rsi
+; X64-KNL-NEXT: kmovw %esi, %k4
; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
-; X64-KNL-NEXT: shrq $48, %r9
-; X64-KNL-NEXT: movzbl %r9b, %ecx
-; X64-KNL-NEXT: kmovw %ecx, %k4
-; X64-KNL-NEXT: kunpckbw %k4, %k2, %k2
-; X64-KNL-NEXT: kmovw %ebx, %k4
-; X64-KNL-NEXT: kunpckbw %k0, %k4, %k0
-; X64-KNL-NEXT: kmovw %k0, (%rax)
-; X64-KNL-NEXT: kmovw %k2, 6(%rax)
-; X64-KNL-NEXT: kmovw %k3, 4(%rax)
-; X64-KNL-NEXT: kmovw %k1, 2(%rax)
-; X64-KNL-NEXT: popq %rbx
-; X64-KNL-NEXT: .cfi_def_cfa_offset 8
+; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
+; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
+; X64-KNL-NEXT: kmovw %k1, 2(%rdi)
+; X64-KNL-NEXT: kmovw %k0, (%rdi)
; X64-KNL-NEXT: retq
%.splatinsert = insertelement <64 x i64> poison, i64 %mask, i64 0
%.splat = shufflevector <64 x i64> %.splatinsert, <64 x i64> poison, <64 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
index f0f430abc48dc..060bd1764d3c4 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
@@ -417,7 +417,6 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
; AVX512F-NEXT: seta %al
-; AVX512F-NEXT: negb %al
; AVX512F-NEXT: kmovd %eax, %k1
; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0 | ||
; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]] | ||
; CHECK-NEXT: i64x2.splat $push[[L:[0-9]+]]=, $0 | ||
; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L]], $2, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tlively does this break the behavior that this test was intended to check, or is there an issue with the test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like the test was originally added to show that an assertion was not hit, so it's still correctly showing that after this change. The actual codegen change looks like a regression, but I don't think the test is representative of common patterns, so that's probably fine. LGTM to land the patch from the Wasm point of view.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM if the webassembly test change is ok
This adds a call to SimplifyDemandedBits from bitcasts with scalar input types in SimplifyDemandedVectorElts, which can help simplify the input scalar.