Skip to content

[SelectionDAG] Use SimplifyDemandedBits from SimplifyDemandedVectorElts Bitcast. #133717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3163,10 +3163,23 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();

// We only handle vectors here.
// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
if (!SrcVT.isVector())
if (!SrcVT.isVector()) {
// TODO - bigendian once we have test coverage.
if (IsLE) {
APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
unsigned EltSize = VT.getScalarSizeInBits();
for (unsigned I = 0; I != NumElts; ++I) {
if (DemandedElts[I]) {
unsigned Offset = I * EltSize;
DemandedSrcBits.setBits(Offset, Offset + EltSize);
}
}
KnownBits Known;
if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
return true;
}
break;
}

// Fast handling of 'identity' bitcasts.
unsigned NumSrcElts = SrcVT.getVectorNumElements();
Expand Down
68 changes: 46 additions & 22 deletions llvm/test/CodeGen/AArch64/bitcast-extend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -217,17 +217,28 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
}

define void @extractbitcastext(i32 %bytes, ptr %output) {
; CHECK-LABEL: extractbitcastext:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ushll v1.2d, v0.2s, #0
; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
; CHECK-SD-LABEL: extractbitcastext:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: fmov d0, x0
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0
; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-SD-NEXT: stp q1, q0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: extractbitcastext:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
; CHECK-GI-NEXT: stp q1, q0, [x1]
; CHECK-GI-NEXT: ret
%conv = sext i32 %bytes to i64
%b0 = bitcast i64 %conv to <8 x i8>
%b1 = zext <8 x i8> %b0 to <8 x i16>
Expand All @@ -244,17 +255,28 @@ define void @extractbitcastext(i32 %bytes, ptr %output) {
}

define void @extractbitcastext_s(i32 %bytes, ptr %output) {
; CHECK-LABEL: extractbitcastext_s:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x8, w0
; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: sshll v1.2d, v0.2s, #0
; CHECK-NEXT: sshll2 v0.2d, v0.4s, #0
; CHECK-NEXT: stp q1, q0, [x1]
; CHECK-NEXT: ret
; CHECK-SD-LABEL: extractbitcastext_s:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-SD-NEXT: fmov d0, x0
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-SD-NEXT: sshll v1.2d, v0.2s, #0
; CHECK-SD-NEXT: sshll2 v0.2d, v0.4s, #0
; CHECK-SD-NEXT: stp q1, q0, [x1]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: extractbitcastext_s:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-GI-NEXT: sxtw x8, w0
; CHECK-GI-NEXT: fmov d0, x8
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
; CHECK-GI-NEXT: sshll2 v0.2d, v0.4s, #0
; CHECK-GI-NEXT: stp q1, q0, [x1]
; CHECK-GI-NEXT: ret
%conv = sext i32 %bytes to i64
%b0 = bitcast i64 %conv to <8 x i8>
%b1 = sext <8 x i8> %b0 to <8 x i16>
Expand All @@ -271,3 +293,5 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) {
}


;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-vdup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: strd r0, r1, [sp]
; CHECK-LE-NEXT: str r0, [sp]
; CHECK-LE-NEXT: mov r0, sp
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
; CHECK-LE-NEXT: vmov r0, s0
Expand Down Expand Up @@ -420,7 +420,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
; CHECK-LE: @ %bb.0:
; CHECK-LE-NEXT: .pad #8
; CHECK-LE-NEXT: sub sp, #8
; CHECK-LE-NEXT: strd r0, r1, [sp]
; CHECK-LE-NEXT: str r0, [sp]
; CHECK-LE-NEXT: mov r0, sp
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
; CHECK-LE-NEXT: vmov r0, s1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ define <2 x i2> @i2x2_splat(i1 %x) {

; CHECK-LABEL: not_a_vec:
; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0
; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]
; CHECK-NEXT: i64x2.splat $push[[L:[0-9]+]]=, $0
; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L]], $2, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tlively does this break the behavior that this test was intended to check, or is there an issue with the test?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the test was originally added to show that an assertion was not hit, so it's still correctly showing that after this change. The actual codegen change looks like a regression, but I don't think the test is representative of common patterns, so that's probably fine. LGTM to land the patch from the Wasm point of view.

; CHECK-NEXT: return $pop[[R]]
define <4 x i32> @not_a_vec(i128 %x) {
%a = bitcast i128 %x to <4 x i32>
Expand Down
136 changes: 51 additions & 85 deletions llvm/test/CodeGen/X86/kmov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -386,36 +386,28 @@ define <32 x i1> @invert_i32_mask_extract_32(i32 %mask) {
define <32 x i1> @i64_mask_extract_32(i64 %mask) {
; X64-AVX512-LABEL: i64_mask_extract_32:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: movq %rdi, %rax
; X64-AVX512-NEXT: kmovd %eax, %k0
; X64-AVX512-NEXT: movzbl %ah, %ecx
; X64-AVX512-NEXT: kmovd %ecx, %k1
; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
; X64-AVX512-NEXT: movl %eax, %ecx
; X64-AVX512-NEXT: shrl $24, %ecx
; X64-AVX512-NEXT: kmovd %ecx, %k1
; X64-AVX512-NEXT: shrl $16, %eax
; X64-AVX512-NEXT: movzbl %al, %eax
; X64-AVX512-NEXT: kmovd %eax, %k2
; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
; X64-AVX512-NEXT: kmovq %rdi, %k0
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
; X64-AVX512-NEXT: kshiftrd $16, %k0, %k2
; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
; X64-AVX512-NEXT: kunpckbw %k2, %k0, %k0
; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
; X64-AVX512-NEXT: retq
;
; X64-KNL-LABEL: i64_mask_extract_32:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: movq %rdi, %rax
; X64-KNL-NEXT: movl %eax, %ecx
; X64-KNL-NEXT: movl %edi, %eax
; X64-KNL-NEXT: shrl $16, %eax
; X64-KNL-NEXT: kmovw %eax, %k0
; X64-KNL-NEXT: movzbl %ah, %edx
; X64-KNL-NEXT: # kill: def $eax killed $eax killed $rax
; X64-KNL-NEXT: movl %edi, %eax
; X64-KNL-NEXT: shrl $24, %eax
; X64-KNL-NEXT: kmovw %eax, %k1
; X64-KNL-NEXT: shrl $16, %ecx
; X64-KNL-NEXT: movzbl %cl, %eax
; X64-KNL-NEXT: kmovw %eax, %k2
; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
; X64-KNL-NEXT: kmovw %edx, %k2
; X64-KNL-NEXT: kunpckbw %k0, %k1, %k1
; X64-KNL-NEXT: kmovw %edi, %k0
; X64-KNL-NEXT: shrl $8, %edi
; X64-KNL-NEXT: kmovw %edi, %k2
; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
Expand Down Expand Up @@ -480,82 +472,56 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
define <64 x i1> @i64_mask_extract_64(i64 %mask) {
; X64-AVX512-LABEL: i64_mask_extract_64:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: movq %rdi, %rax
; X64-AVX512-NEXT: kmovd %eax, %k0
; X64-AVX512-NEXT: movzbl %ah, %ecx
; X64-AVX512-NEXT: kmovd %ecx, %k1
; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
; X64-AVX512-NEXT: movl %eax, %ecx
; X64-AVX512-NEXT: shrl $24, %ecx
; X64-AVX512-NEXT: kmovd %ecx, %k1
; X64-AVX512-NEXT: movl %eax, %ecx
; X64-AVX512-NEXT: shrl $16, %ecx
; X64-AVX512-NEXT: movzbl %cl, %ecx
; X64-AVX512-NEXT: kmovd %ecx, %k2
; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
; X64-AVX512-NEXT: movq %rdi, %rcx
; X64-AVX512-NEXT: shrq $32, %rcx
; X64-AVX512-NEXT: movzbl %cl, %ecx
; X64-AVX512-NEXT: kmovd %ecx, %k1
; X64-AVX512-NEXT: movq %rdi, %rcx
; X64-AVX512-NEXT: shrq $40, %rcx
; X64-AVX512-NEXT: movzbl %cl, %ecx
; X64-AVX512-NEXT: kmovd %ecx, %k2
; X64-AVX512-NEXT: kmovq %rdi, %k0
; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
; X64-AVX512-NEXT: movq %rdi, %rcx
; X64-AVX512-NEXT: shrq $56, %rcx
; X64-AVX512-NEXT: kmovd %ecx, %k2
; X64-AVX512-NEXT: shrq $48, %rax
; X64-AVX512-NEXT: movzbl %al, %eax
; X64-AVX512-NEXT: kmovd %eax, %k3
; X64-AVX512-NEXT: kunpckbw %k3, %k2, %k2
; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
; X64-AVX512-NEXT: kshiftrd $16, %k0, %k3
; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
; X64-AVX512-NEXT: kunpckbw %k3, %k0, %k0
; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
; X64-AVX512-NEXT: retq
;
; X64-KNL-LABEL: i64_mask_extract_64:
; X64-KNL: # %bb.0:
; X64-KNL-NEXT: pushq %rbx
; X64-KNL-NEXT: .cfi_def_cfa_offset 16
; X64-KNL-NEXT: .cfi_offset %rbx, -16
; X64-KNL-NEXT: movq %rsi, %rcx
; X64-KNL-NEXT: movq %rdi, %rax
; X64-KNL-NEXT: movl %ecx, %edx
; X64-KNL-NEXT: movq %rsi, %rdi
; X64-KNL-NEXT: movq %rsi, %r8
; X64-KNL-NEXT: movq %rsi, %r9
; X64-KNL-NEXT: kmovw %ecx, %k0
; X64-KNL-NEXT: movzbl %ch, %ebx
; X64-KNL-NEXT: # kill: def $ecx killed $ecx killed $rcx
; X64-KNL-NEXT: shrl $24, %ecx
; X64-KNL-NEXT: kmovw %esi, %k0
; X64-KNL-NEXT: movl %esi, %ecx
; X64-KNL-NEXT: shrl $8, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k1
; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
; X64-KNL-NEXT: movl %esi, %ecx
; X64-KNL-NEXT: shrl $16, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k1
; X64-KNL-NEXT: shrl $16, %edx
; X64-KNL-NEXT: movzbl %dl, %ecx
; X64-KNL-NEXT: movl %esi, %ecx
; X64-KNL-NEXT: shrl $24, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k2
; X64-KNL-NEXT: shrq $32, %rsi
; X64-KNL-NEXT: movzbl %sil, %ecx
; X64-KNL-NEXT: kunpckbw %k1, %k2, %k1
; X64-KNL-NEXT: movq %rsi, %rcx
; X64-KNL-NEXT: shrq $32, %rcx
; X64-KNL-NEXT: kmovw %ecx, %k2
; X64-KNL-NEXT: movq %rsi, %rcx
; X64-KNL-NEXT: shrq $40, %rcx
; X64-KNL-NEXT: kmovw %ecx, %k3
; X64-KNL-NEXT: kunpckbw %k2, %k3, %k2
; X64-KNL-NEXT: movq %rsi, %rcx
; X64-KNL-NEXT: shrq $48, %rcx
; X64-KNL-NEXT: kmovw %ecx, %k3
; X64-KNL-NEXT: shrq $40, %rdi
; X64-KNL-NEXT: movzbl %dil, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k4
; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
; X64-KNL-NEXT: shrq $56, %r8
; X64-KNL-NEXT: kmovw %r8d, %k2
; X64-KNL-NEXT: shrq $56, %rsi
; X64-KNL-NEXT: kmovw %esi, %k4
; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
; X64-KNL-NEXT: shrq $48, %r9
; X64-KNL-NEXT: movzbl %r9b, %ecx
; X64-KNL-NEXT: kmovw %ecx, %k4
; X64-KNL-NEXT: kunpckbw %k4, %k2, %k2
; X64-KNL-NEXT: kmovw %ebx, %k4
; X64-KNL-NEXT: kunpckbw %k0, %k4, %k0
; X64-KNL-NEXT: kmovw %k0, (%rax)
; X64-KNL-NEXT: kmovw %k2, 6(%rax)
; X64-KNL-NEXT: kmovw %k3, 4(%rax)
; X64-KNL-NEXT: kmovw %k1, 2(%rax)
; X64-KNL-NEXT: popq %rbx
; X64-KNL-NEXT: .cfi_def_cfa_offset 8
; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
; X64-KNL-NEXT: kmovw %k1, 2(%rdi)
; X64-KNL-NEXT: kmovw %k0, (%rdi)
; X64-KNL-NEXT: retq
%.splatinsert = insertelement <64 x i64> poison, i64 %mask, i64 0
%.splat = shufflevector <64 x i64> %.splatinsert, <64 x i64> poison, <64 x i32> zeroinitializer
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,6 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
; AVX512F-NEXT: seta %al
; AVX512F-NEXT: negb %al
; AVX512F-NEXT: kmovd %eax, %k1
; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0
Expand Down