Skip to content

Commit 35a9631

Browse files
rohitaggarwal007Rohit AggarwalRKSimon
authored
[X86][SelectionDAG] Handle the case for gather where index is SHL (#139703)
Fix the Gather's Index for SHL Opcode in which shift amount is 4 or greater. It is in the continuity of #137813 --------- Co-authored-by: Rohit Aggarwal <[email protected]> Co-authored-by: Simon Pilgrim <[email protected]>
1 parent 21b4059 commit 35a9631

File tree

2 files changed

+34
-112
lines changed

2 files changed

+34
-112
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56725,7 +56725,7 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
5672556725
return SDValue(N, 0);
5672656726
}
5672756727
if (auto MinShAmt = DAG.getValidMinimumShiftAmount(Index)) {
56728-
if (*MinShAmt >= 1 && (*MinShAmt + Log2ScaleAmt) < 4 &&
56728+
if (*MinShAmt >= 1 && Log2ScaleAmt < 3 &&
5672956729
DAG.ComputeNumSignBits(Index.getOperand(0)) > 1) {
5673056730
SDValue ShAmt = Index.getOperand(1);
5673156731
SDValue NewShAmt =

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 33 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -4808,16 +4808,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48084808
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48094809
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
48104810
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4811-
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4812-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4813-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4814-
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
4815-
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4816-
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4817-
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4818-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4819-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4820-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4811+
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4812+
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4813+
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
48214814
; X64-KNL-NEXT: retq
48224815
;
48234816
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4839,16 +4832,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48394832
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
48404833
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
48414834
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4842-
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4843-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4844-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4845-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
4846-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
4847-
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4848-
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
4849-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4850-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4851-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4835+
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4836+
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4837+
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
48524838
; X64-SKX-SMALL-NEXT: retq
48534839
;
48544840
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4859,16 +4845,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48594845
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
48604846
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
48614847
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4862-
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4863-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4864-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4865-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
4866-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
4867-
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4868-
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
4869-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm3 {%k2}
4870-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1}
4871-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4848+
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4849+
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4850+
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
48724851
; X64-SKX-LARGE-NEXT: retq
48734852
;
48744853
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index:
@@ -4898,16 +4877,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48984877
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48994878
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
49004879
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4901-
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4902-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4903-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4904-
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm0
4905-
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4906-
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4907-
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4908-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4909-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4910-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4880+
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4881+
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4882+
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
49114883
; X64-KNL-NEXT: retq
49124884
;
49134885
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4929,16 +4901,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49294901
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
49304902
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
49314903
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4932-
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4933-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4934-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4935-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm0
4936-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
4937-
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4938-
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
4939-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4940-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4941-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4904+
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4905+
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4906+
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
49424907
; X64-SKX-SMALL-NEXT: retq
49434908
;
49444909
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4949,16 +4914,9 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49494914
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
49504915
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
49514916
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4952-
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4953-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4954-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4955-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm0
4956-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
4957-
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm3
4958-
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
4959-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm3 {%k2}
4960-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm0), %ymm1 {%k1}
4961-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
4917+
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4918+
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4919+
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
49624920
; X64-SKX-LARGE-NEXT: retq
49634921
;
49644922
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset:
@@ -4988,23 +4946,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
49884946
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
49894947
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
49904948
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4991-
; X64-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
4992-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
4993-
; X64-KNL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
4994-
; X64-KNL-NEXT: vpsllq $4, %zmm0, %zmm3
4995-
; X64-KNL-NEXT: vpsllq $4, %zmm2, %zmm2
4996-
; X64-KNL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
4997-
; X64-KNL-NEXT: kshiftrw $8, %k1, %k2
4998-
; X64-KNL-NEXT: kmovw %k2, %k3
4999-
; X64-KNL-NEXT: vmovaps %ymm4, %ymm0
5000-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5001-
; X64-KNL-NEXT: vmovaps %ymm1, %ymm5
5002-
; X64-KNL-NEXT: kmovw %k1, %k3
5003-
; X64-KNL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5004-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5005-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5006-
; X64-KNL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5007-
; X64-KNL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
4949+
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4950+
; X64-KNL-NEXT: kmovw %k1, %k2
4951+
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4952+
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4953+
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50084954
; X64-KNL-NEXT: retq
50094955
;
50104956
; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_pair:
@@ -5028,23 +4974,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
50284974
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
50294975
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
50304976
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
5031-
; X64-SKX-SMALL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5032-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
5033-
; X64-SKX-SMALL-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
5034-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm0, %zmm3
5035-
; X64-SKX-SMALL-NEXT: vpsllq $4, %zmm2, %zmm2
5036-
; X64-SKX-SMALL-NEXT: vextractf64x4 $1, %zmm1, %ymm4
5037-
; X64-SKX-SMALL-NEXT: kshiftrw $8, %k1, %k2
5038-
; X64-SKX-SMALL-NEXT: kmovw %k2, %k3
5039-
; X64-SKX-SMALL-NEXT: vmovaps %ymm4, %ymm0
5040-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5041-
; X64-SKX-SMALL-NEXT: vmovaps %ymm1, %ymm5
5042-
; X64-SKX-SMALL-NEXT: kmovw %k1, %k3
5043-
; X64-SKX-SMALL-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5044-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5045-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5046-
; X64-SKX-SMALL-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5047-
; X64-SKX-SMALL-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
4977+
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4978+
; X64-SKX-SMALL-NEXT: kmovw %k1, %k2
4979+
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
4980+
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4981+
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50484982
; X64-SKX-SMALL-NEXT: retq
50494983
;
50504984
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_pair:
@@ -5055,23 +4989,11 @@ define {<16 x float>, <16 x float>} @test_gather_structpt_16f32_mask_index_pair(
50554989
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
50564990
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
50574991
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
5058-
; X64-SKX-LARGE-NEXT: vextracti64x4 $1, %zmm0, %ymm2
5059-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero
5060-
; X64-SKX-LARGE-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
5061-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm0, %zmm3
5062-
; X64-SKX-LARGE-NEXT: vpsllq $4, %zmm2, %zmm2
5063-
; X64-SKX-LARGE-NEXT: vextractf64x4 $1, %zmm1, %ymm4
5064-
; X64-SKX-LARGE-NEXT: kshiftrw $8, %k1, %k2
5065-
; X64-SKX-LARGE-NEXT: vmovaps %ymm4, %ymm0
5066-
; X64-SKX-LARGE-NEXT: kmovw %k2, %k3
5067-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm2), %ymm0 {%k3}
5068-
; X64-SKX-LARGE-NEXT: vmovaps %ymm1, %ymm5
5069-
; X64-SKX-LARGE-NEXT: kmovw %k1, %k3
5070-
; X64-SKX-LARGE-NEXT: vgatherqps (%rdi,%zmm3), %ymm5 {%k3}
5071-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm0, %zmm5, %zmm0
5072-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm2), %ymm4 {%k2}
5073-
; X64-SKX-LARGE-NEXT: vgatherqps 4(%rdi,%zmm3), %ymm1 {%k1}
5074-
; X64-SKX-LARGE-NEXT: vinsertf64x4 $1, %ymm4, %zmm1, %zmm1
4992+
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4993+
; X64-SKX-LARGE-NEXT: kmovw %k1, %k2
4994+
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
4995+
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4996+
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
50754997
; X64-SKX-LARGE-NEXT: retq
50764998
;
50774999
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_pair:

0 commit comments

Comments
 (0)