Skip to content

Commit 6c27817

Browse files
authored
[SelectionDAG] Use SimplifyDemandedBits from SimplifyDemandedVectorElts Bitcast. (#133717)
This adds a call to SimplifyDemandedBits from bitcasts with scalar input types in SimplifyDemandedVectorElts, which can help simplify the input scalar.
1 parent 554f4d1 commit 6c27817

File tree

6 files changed

+117
-115
lines changed

6 files changed

+117
-115
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3163,10 +3163,23 @@ bool TargetLowering::SimplifyDemandedVectorElts(
31633163
SDValue Src = Op.getOperand(0);
31643164
EVT SrcVT = Src.getValueType();
31653165

3166-
// We only handle vectors here.
3167-
// TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3168-
if (!SrcVT.isVector())
3166+
if (!SrcVT.isVector()) {
3167+
// TODO - bigendian once we have test coverage.
3168+
if (IsLE) {
3169+
APInt DemandedSrcBits = APInt::getZero(SrcVT.getSizeInBits());
3170+
unsigned EltSize = VT.getScalarSizeInBits();
3171+
for (unsigned I = 0; I != NumElts; ++I) {
3172+
if (DemandedElts[I]) {
3173+
unsigned Offset = I * EltSize;
3174+
DemandedSrcBits.setBits(Offset, Offset + EltSize);
3175+
}
3176+
}
3177+
KnownBits Known;
3178+
if (SimplifyDemandedBits(Src, DemandedSrcBits, Known, TLO, Depth + 1))
3179+
return true;
3180+
}
31693181
break;
3182+
}
31703183

31713184
// Fast handling of 'identity' bitcasts.
31723185
unsigned NumSrcElts = SrcVT.getVectorNumElements();

llvm/test/CodeGen/AArch64/bitcast-extend.ll

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -217,17 +217,28 @@ define <4 x i64> @s_i32_v4i64(i32 %x) {
217217
}
218218

219219
define void @extractbitcastext(i32 %bytes, ptr %output) {
220-
; CHECK-LABEL: extractbitcastext:
221-
; CHECK: // %bb.0:
222-
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
223-
; CHECK-NEXT: sxtw x8, w0
224-
; CHECK-NEXT: fmov d0, x8
225-
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
226-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
227-
; CHECK-NEXT: ushll v1.2d, v0.2s, #0
228-
; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0
229-
; CHECK-NEXT: stp q1, q0, [x1]
230-
; CHECK-NEXT: ret
220+
; CHECK-SD-LABEL: extractbitcastext:
221+
; CHECK-SD: // %bb.0:
222+
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
223+
; CHECK-SD-NEXT: fmov d0, x0
224+
; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
225+
; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
226+
; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0
227+
; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0
228+
; CHECK-SD-NEXT: stp q1, q0, [x1]
229+
; CHECK-SD-NEXT: ret
230+
;
231+
; CHECK-GI-LABEL: extractbitcastext:
232+
; CHECK-GI: // %bb.0:
233+
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
234+
; CHECK-GI-NEXT: sxtw x8, w0
235+
; CHECK-GI-NEXT: fmov d0, x8
236+
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
237+
; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
238+
; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0
239+
; CHECK-GI-NEXT: ushll2 v0.2d, v0.4s, #0
240+
; CHECK-GI-NEXT: stp q1, q0, [x1]
241+
; CHECK-GI-NEXT: ret
231242
%conv = sext i32 %bytes to i64
232243
%b0 = bitcast i64 %conv to <8 x i8>
233244
%b1 = zext <8 x i8> %b0 to <8 x i16>
@@ -244,17 +255,28 @@ define void @extractbitcastext(i32 %bytes, ptr %output) {
244255
}
245256

246257
define void @extractbitcastext_s(i32 %bytes, ptr %output) {
247-
; CHECK-LABEL: extractbitcastext_s:
248-
; CHECK: // %bb.0:
249-
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
250-
; CHECK-NEXT: sxtw x8, w0
251-
; CHECK-NEXT: fmov d0, x8
252-
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
253-
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
254-
; CHECK-NEXT: sshll v1.2d, v0.2s, #0
255-
; CHECK-NEXT: sshll2 v0.2d, v0.4s, #0
256-
; CHECK-NEXT: stp q1, q0, [x1]
257-
; CHECK-NEXT: ret
258+
; CHECK-SD-LABEL: extractbitcastext_s:
259+
; CHECK-SD: // %bb.0:
260+
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0
261+
; CHECK-SD-NEXT: fmov d0, x0
262+
; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0
263+
; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
264+
; CHECK-SD-NEXT: sshll v1.2d, v0.2s, #0
265+
; CHECK-SD-NEXT: sshll2 v0.2d, v0.4s, #0
266+
; CHECK-SD-NEXT: stp q1, q0, [x1]
267+
; CHECK-SD-NEXT: ret
268+
;
269+
; CHECK-GI-LABEL: extractbitcastext_s:
270+
; CHECK-GI: // %bb.0:
271+
; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0
272+
; CHECK-GI-NEXT: sxtw x8, w0
273+
; CHECK-GI-NEXT: fmov d0, x8
274+
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
275+
; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
276+
; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0
277+
; CHECK-GI-NEXT: sshll2 v0.2d, v0.4s, #0
278+
; CHECK-GI-NEXT: stp q1, q0, [x1]
279+
; CHECK-GI-NEXT: ret
258280
%conv = sext i32 %bytes to i64
259281
%b0 = bitcast i64 %conv to <8 x i8>
260282
%b1 = sext <8 x i8> %b0 to <8 x i16>
@@ -271,3 +293,5 @@ define void @extractbitcastext_s(i32 %bytes, ptr %output) {
271293
}
272294

273295

296+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
297+
; CHECK: {{.*}}

llvm/test/CodeGen/Thumb2/mve-vdup.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) {
371371
; CHECK-LE: @ %bb.0:
372372
; CHECK-LE-NEXT: .pad #8
373373
; CHECK-LE-NEXT: sub sp, #8
374-
; CHECK-LE-NEXT: strd r0, r1, [sp]
374+
; CHECK-LE-NEXT: str r0, [sp]
375375
; CHECK-LE-NEXT: mov r0, sp
376376
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
377377
; CHECK-LE-NEXT: vmov r0, s0
@@ -420,7 +420,7 @@ define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
420420
; CHECK-LE: @ %bb.0:
421421
; CHECK-LE-NEXT: .pad #8
422422
; CHECK-LE-NEXT: sub sp, #8
423-
; CHECK-LE-NEXT: strd r0, r1, [sp]
423+
; CHECK-LE-NEXT: str r0, [sp]
424424
; CHECK-LE-NEXT: mov r0, sp
425425
; CHECK-LE-NEXT: vldrh.u32 q0, [r0]
426426
; CHECK-LE-NEXT: vmov r0, s1

llvm/test/CodeGen/WebAssembly/simd-shuffle-bitcast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ define <2 x i2> @i2x2_splat(i1 %x) {
2727

2828
; CHECK-LABEL: not_a_vec:
2929
; CHECK-NEXT: .functype not_a_vec (i64, i64) -> (v128){{$}}
30-
; CHECK-NEXT: i32.wrap_i64 $push[[L:[0-9]+]]=, $0
31-
; CHECK-NEXT: i32x4.splat $push[[R:[0-9]+]]=, $pop[[L]]
30+
; CHECK-NEXT: i64x2.splat $push[[L:[0-9]+]]=, $0
31+
; CHECK-NEXT: i8x16.shuffle $push[[R:[0-9]+]]=, $pop[[L]], $2, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
3232
; CHECK-NEXT: return $pop[[R]]
3333
define <4 x i32> @not_a_vec(i128 %x) {
3434
%a = bitcast i128 %x to <4 x i32>

llvm/test/CodeGen/X86/kmov.ll

Lines changed: 51 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -386,36 +386,28 @@ define <32 x i1> @invert_i32_mask_extract_32(i32 %mask) {
386386
define <32 x i1> @i64_mask_extract_32(i64 %mask) {
387387
; X64-AVX512-LABEL: i64_mask_extract_32:
388388
; X64-AVX512: # %bb.0:
389-
; X64-AVX512-NEXT: movq %rdi, %rax
390-
; X64-AVX512-NEXT: kmovd %eax, %k0
391-
; X64-AVX512-NEXT: movzbl %ah, %ecx
392-
; X64-AVX512-NEXT: kmovd %ecx, %k1
393-
; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
394-
; X64-AVX512-NEXT: movl %eax, %ecx
395-
; X64-AVX512-NEXT: shrl $24, %ecx
396-
; X64-AVX512-NEXT: kmovd %ecx, %k1
397-
; X64-AVX512-NEXT: shrl $16, %eax
398-
; X64-AVX512-NEXT: movzbl %al, %eax
399-
; X64-AVX512-NEXT: kmovd %eax, %k2
400-
; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
401-
; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
389+
; X64-AVX512-NEXT: kmovq %rdi, %k0
390+
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
391+
; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
392+
; X64-AVX512-NEXT: kshiftrd $16, %k0, %k2
393+
; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
394+
; X64-AVX512-NEXT: kunpckbw %k2, %k0, %k0
395+
; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
402396
; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
403397
; X64-AVX512-NEXT: retq
404398
;
405399
; X64-KNL-LABEL: i64_mask_extract_32:
406400
; X64-KNL: # %bb.0:
407-
; X64-KNL-NEXT: movq %rdi, %rax
408-
; X64-KNL-NEXT: movl %eax, %ecx
401+
; X64-KNL-NEXT: movl %edi, %eax
402+
; X64-KNL-NEXT: shrl $16, %eax
409403
; X64-KNL-NEXT: kmovw %eax, %k0
410-
; X64-KNL-NEXT: movzbl %ah, %edx
411-
; X64-KNL-NEXT: # kill: def $eax killed $eax killed $rax
404+
; X64-KNL-NEXT: movl %edi, %eax
412405
; X64-KNL-NEXT: shrl $24, %eax
413406
; X64-KNL-NEXT: kmovw %eax, %k1
414-
; X64-KNL-NEXT: shrl $16, %ecx
415-
; X64-KNL-NEXT: movzbl %cl, %eax
416-
; X64-KNL-NEXT: kmovw %eax, %k2
417-
; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
418-
; X64-KNL-NEXT: kmovw %edx, %k2
407+
; X64-KNL-NEXT: kunpckbw %k0, %k1, %k1
408+
; X64-KNL-NEXT: kmovw %edi, %k0
409+
; X64-KNL-NEXT: shrl $8, %edi
410+
; X64-KNL-NEXT: kmovw %edi, %k2
419411
; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
420412
; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
421413
; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
@@ -480,82 +472,56 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
480472
define <64 x i1> @i64_mask_extract_64(i64 %mask) {
481473
; X64-AVX512-LABEL: i64_mask_extract_64:
482474
; X64-AVX512: # %bb.0:
483-
; X64-AVX512-NEXT: movq %rdi, %rax
484-
; X64-AVX512-NEXT: kmovd %eax, %k0
485-
; X64-AVX512-NEXT: movzbl %ah, %ecx
486-
; X64-AVX512-NEXT: kmovd %ecx, %k1
487-
; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
488-
; X64-AVX512-NEXT: movl %eax, %ecx
489-
; X64-AVX512-NEXT: shrl $24, %ecx
490-
; X64-AVX512-NEXT: kmovd %ecx, %k1
491-
; X64-AVX512-NEXT: movl %eax, %ecx
492-
; X64-AVX512-NEXT: shrl $16, %ecx
493-
; X64-AVX512-NEXT: movzbl %cl, %ecx
494-
; X64-AVX512-NEXT: kmovd %ecx, %k2
495-
; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
496-
; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
497-
; X64-AVX512-NEXT: movq %rdi, %rcx
498-
; X64-AVX512-NEXT: shrq $32, %rcx
499-
; X64-AVX512-NEXT: movzbl %cl, %ecx
500-
; X64-AVX512-NEXT: kmovd %ecx, %k1
501-
; X64-AVX512-NEXT: movq %rdi, %rcx
502-
; X64-AVX512-NEXT: shrq $40, %rcx
503-
; X64-AVX512-NEXT: movzbl %cl, %ecx
504-
; X64-AVX512-NEXT: kmovd %ecx, %k2
475+
; X64-AVX512-NEXT: kmovq %rdi, %k0
476+
; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
477+
; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
505478
; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
506-
; X64-AVX512-NEXT: movq %rdi, %rcx
507-
; X64-AVX512-NEXT: shrq $56, %rcx
508-
; X64-AVX512-NEXT: kmovd %ecx, %k2
509-
; X64-AVX512-NEXT: shrq $48, %rax
510-
; X64-AVX512-NEXT: movzbl %al, %eax
511-
; X64-AVX512-NEXT: kmovd %eax, %k3
512-
; X64-AVX512-NEXT: kunpckbw %k3, %k2, %k2
479+
; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
480+
; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
481+
; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
513482
; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
483+
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
484+
; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
485+
; X64-AVX512-NEXT: kshiftrd $16, %k0, %k3
486+
; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
487+
; X64-AVX512-NEXT: kunpckbw %k3, %k0, %k0
488+
; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
514489
; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
515490
; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
516491
; X64-AVX512-NEXT: retq
517492
;
518493
; X64-KNL-LABEL: i64_mask_extract_64:
519494
; X64-KNL: # %bb.0:
520-
; X64-KNL-NEXT: pushq %rbx
521-
; X64-KNL-NEXT: .cfi_def_cfa_offset 16
522-
; X64-KNL-NEXT: .cfi_offset %rbx, -16
523-
; X64-KNL-NEXT: movq %rsi, %rcx
524495
; X64-KNL-NEXT: movq %rdi, %rax
525-
; X64-KNL-NEXT: movl %ecx, %edx
526-
; X64-KNL-NEXT: movq %rsi, %rdi
527-
; X64-KNL-NEXT: movq %rsi, %r8
528-
; X64-KNL-NEXT: movq %rsi, %r9
529-
; X64-KNL-NEXT: kmovw %ecx, %k0
530-
; X64-KNL-NEXT: movzbl %ch, %ebx
531-
; X64-KNL-NEXT: # kill: def $ecx killed $ecx killed $rcx
532-
; X64-KNL-NEXT: shrl $24, %ecx
496+
; X64-KNL-NEXT: kmovw %esi, %k0
497+
; X64-KNL-NEXT: movl %esi, %ecx
498+
; X64-KNL-NEXT: shrl $8, %ecx
499+
; X64-KNL-NEXT: kmovw %ecx, %k1
500+
; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
501+
; X64-KNL-NEXT: movl %esi, %ecx
502+
; X64-KNL-NEXT: shrl $16, %ecx
533503
; X64-KNL-NEXT: kmovw %ecx, %k1
534-
; X64-KNL-NEXT: shrl $16, %edx
535-
; X64-KNL-NEXT: movzbl %dl, %ecx
504+
; X64-KNL-NEXT: movl %esi, %ecx
505+
; X64-KNL-NEXT: shrl $24, %ecx
536506
; X64-KNL-NEXT: kmovw %ecx, %k2
537-
; X64-KNL-NEXT: shrq $32, %rsi
538-
; X64-KNL-NEXT: movzbl %sil, %ecx
507+
; X64-KNL-NEXT: kunpckbw %k1, %k2, %k1
508+
; X64-KNL-NEXT: movq %rsi, %rcx
509+
; X64-KNL-NEXT: shrq $32, %rcx
510+
; X64-KNL-NEXT: kmovw %ecx, %k2
511+
; X64-KNL-NEXT: movq %rsi, %rcx
512+
; X64-KNL-NEXT: shrq $40, %rcx
513+
; X64-KNL-NEXT: kmovw %ecx, %k3
514+
; X64-KNL-NEXT: kunpckbw %k2, %k3, %k2
515+
; X64-KNL-NEXT: movq %rsi, %rcx
516+
; X64-KNL-NEXT: shrq $48, %rcx
539517
; X64-KNL-NEXT: kmovw %ecx, %k3
540-
; X64-KNL-NEXT: shrq $40, %rdi
541-
; X64-KNL-NEXT: movzbl %dil, %ecx
542-
; X64-KNL-NEXT: kmovw %ecx, %k4
543-
; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
544-
; X64-KNL-NEXT: shrq $56, %r8
545-
; X64-KNL-NEXT: kmovw %r8d, %k2
518+
; X64-KNL-NEXT: shrq $56, %rsi
519+
; X64-KNL-NEXT: kmovw %esi, %k4
546520
; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
547-
; X64-KNL-NEXT: shrq $48, %r9
548-
; X64-KNL-NEXT: movzbl %r9b, %ecx
549-
; X64-KNL-NEXT: kmovw %ecx, %k4
550-
; X64-KNL-NEXT: kunpckbw %k4, %k2, %k2
551-
; X64-KNL-NEXT: kmovw %ebx, %k4
552-
; X64-KNL-NEXT: kunpckbw %k0, %k4, %k0
553-
; X64-KNL-NEXT: kmovw %k0, (%rax)
554-
; X64-KNL-NEXT: kmovw %k2, 6(%rax)
555-
; X64-KNL-NEXT: kmovw %k3, 4(%rax)
556-
; X64-KNL-NEXT: kmovw %k1, 2(%rax)
557-
; X64-KNL-NEXT: popq %rbx
558-
; X64-KNL-NEXT: .cfi_def_cfa_offset 8
521+
; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
522+
; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
523+
; X64-KNL-NEXT: kmovw %k1, 2(%rdi)
524+
; X64-KNL-NEXT: kmovw %k0, (%rdi)
559525
; X64-KNL-NEXT: retq
560526
%.splatinsert = insertelement <64 x i64> poison, i64 %mask, i64 0
561527
%.splat = shufflevector <64 x i64> %.splatinsert, <64 x i64> poison, <64 x i32> zeroinitializer

llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,6 @@ define half @test_v2f16(<2 x half> %a0) nounwind {
417417
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3
418418
; AVX512F-NEXT: vucomiss %xmm3, %xmm2
419419
; AVX512F-NEXT: seta %al
420-
; AVX512F-NEXT: negb %al
421420
; AVX512F-NEXT: kmovd %eax, %k1
422421
; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
423422
; AVX512F-NEXT: vmovdqa %xmm1, %xmm0

0 commit comments

Comments
 (0)