Skip to content

[X86][SelectionDAG] Handle the case for gather where index is SHL #139703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
May 19, 2025
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56717,7 +56717,8 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
return SDValue(N, 0);
}
if (auto MinShAmt = DAG.getValidMinimumShiftAmount(Index)) {
if (*MinShAmt >= 1 && (*MinShAmt + Log2ScaleAmt) < 4 &&
if (*MinShAmt >= 1 &&
(((*MinShAmt + Log2ScaleAmt) < 4) || (1 + Log2ScaleAmt < 4)) &&
DAG.ComputeNumSignBits(Index.getOperand(0)) > 1) {
SDValue ShAmt = Index.getOperand(1);
SDValue NewShAmt =
Expand Down
144 changes: 33 additions & 111 deletions llvm/test/CodeGen/X86/masked_gather_scatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4808,16 +4808,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
; X64-KNL-NEXT: retq
;
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index:
Expand All @@ -4839,16 +4832,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: retq
;
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index:
Expand All @@ -4859,16 +4845,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: retq
;
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index:
Expand Down Expand Up @@ -4898,16 +4877,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
; X64-KNL-NEXT: retq
;
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset:
Expand All @@ -4929,16 +4901,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: retq
;
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset:
Expand All @@ -4949,16 +4914,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: retq
;
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset:
Expand Down Expand Up @@ -4988,23 +4946,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm3
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
; X64-KNL-NEXT: kmovw %k2, %k3
; X64-KNL-NEXT: vmovaps %ymm4, %ymm0
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
; X64-KNL-NEXT: vmovaps %ymm1, %ymm5
; X64-KNL-NEXT: kmovw %k1, %k3
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
; X64-KNL-NEXT: kmovw %k1, %k2
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
; X64-KNL-NEXT: retq
;
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_pair:
Expand All @@ -5028,23 +4974,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm3
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
; X64-SKX-SMALL-NEXT: kmovw %k2, %k3
; X64-SKX-SMALL-NEXT: vmovaps %ymm4, %ymm0
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm5
; X64-SKX-SMALL-NEXT: kmovw %k1, %k3
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
; X64-SKX-SMALL-NEXT: kmovw %k1, %k2
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
; X64-SKX-SMALL-NEXT: retq
;
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_pair:
Expand All @@ -5055,23 +4989,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm3
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm4
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
; X64-SKX-LARGE-NEXT: vmovaps %ymm4, %ymm0
; X64-SKX-LARGE-NEXT: kmovw %k2, %k3
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm5
; X64-SKX-LARGE-NEXT: kmovw %k1, %k3
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm2
; X64-SKX-LARGE-NEXT: kmovw %k1, %k2
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
; X64-SKX-LARGE-NEXT: retq
;
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_pair:
Expand Down