@@ -4805,9 +4805,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
4805
4805
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4806
4806
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
4807
4807
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4808
- ; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4809
- ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4810
- ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4808
+ ; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm0
4809
+ ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
4811
4810
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4812
4811
; X64-KNL-NEXT: retq
4813
4812
;
@@ -4818,9 +4817,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
4818
4817
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4819
4818
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
4820
4819
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4821
- ; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4822
- ; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4823
- ; X86-KNL-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
4820
+ ; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4821
+ ; X86-KNL-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
4824
4822
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
4825
4823
; X86-KNL-NEXT: retl
4826
4824
;
@@ -4829,9 +4827,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
4829
4827
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
4830
4828
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
4831
4829
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4832
- ; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4833
- ; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4834
- ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4830
+ ; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm0
4831
+ ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
4835
4832
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4836
4833
; X64-SKX-NEXT: retq
4837
4834
;
@@ -4842,9 +4839,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
4842
4839
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
4843
4840
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
4844
4841
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4845
- ; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4846
- ; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4847
- ; X86-SKX-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
4842
+ ; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4843
+ ; X86-SKX-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
4848
4844
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
4849
4845
; X86-SKX-NEXT: retl
4850
4846
%wide.load = load <16 x i32 >, ptr %arr , align 4
@@ -4861,9 +4857,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
4861
4857
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4862
4858
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
4863
4859
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4864
- ; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4865
- ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4866
- ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4860
+ ; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm0
4861
+ ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0), %zmm1 {%k1}
4867
4862
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4868
4863
; X64-KNL-NEXT: retq
4869
4864
;
@@ -4874,9 +4869,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
4874
4869
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4875
4870
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
4876
4871
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4877
- ; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4878
- ; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4879
- ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
4872
+ ; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4873
+ ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
4880
4874
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
4881
4875
; X86-KNL-NEXT: retl
4882
4876
;
@@ -4885,9 +4879,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
4885
4879
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
4886
4880
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
4887
4881
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4888
- ; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4889
- ; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4890
- ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4882
+ ; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm0
4883
+ ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm0), %zmm1 {%k1}
4891
4884
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4892
4885
; X64-SKX-NEXT: retq
4893
4886
;
@@ -4898,9 +4891,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
4898
4891
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
4899
4892
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
4900
4893
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4901
- ; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4902
- ; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4903
- ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
4894
+ ; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4895
+ ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
4904
4896
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
4905
4897
; X86-SKX-NEXT: retl
4906
4898
%wide.load = load <16 x i32 >, ptr %arr , align 4
@@ -4917,12 +4909,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
4917
4909
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
4918
4910
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
4919
4911
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4920
- ; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4921
- ; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4912
+ ; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm2
4922
4913
; X64-KNL-NEXT: kmovw %k1, %k2
4923
4914
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4924
- ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8 ), %zmm0 {%k2}
4925
- ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2,8 ), %zmm1 {%k1}
4915
+ ; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2), %zmm0 {%k2}
4916
+ ; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2), %zmm1 {%k1}
4926
4917
; X64-KNL-NEXT: retq
4927
4918
;
4928
4919
; X86-KNL-LABEL: test_gather_16f32_mask_index_pair:
@@ -4932,25 +4923,23 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
4932
4923
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4933
4924
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
4934
4925
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4935
- ; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4936
- ; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4926
+ ; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm2
4937
4927
; X86-KNL-NEXT: kmovw %k1, %k2
4938
4928
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
4939
- ; X86-KNL-NEXT: vgatherdps (%eax,%zmm2,8 ), %zmm0 {%k2}
4940
- ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2,8 ), %zmm1 {%k1}
4929
+ ; X86-KNL-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
4930
+ ; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
4941
4931
; X86-KNL-NEXT: retl
4942
4932
;
4943
4933
; X64-SKX-LABEL: test_gather_16f32_mask_index_pair:
4944
4934
; X64-SKX: # %bb.0:
4945
4935
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
4946
4936
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
4947
4937
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4948
- ; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4949
- ; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4938
+ ; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm2
4950
4939
; X64-SKX-NEXT: kmovw %k1, %k2
4951
4940
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4952
- ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm2,8 ), %zmm0 {%k2}
4953
- ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm2,8 ), %zmm1 {%k1}
4941
+ ; X64-SKX-NEXT: vgatherdps (%rdi,%zmm2), %zmm0 {%k2}
4942
+ ; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm2), %zmm1 {%k1}
4954
4943
; X64-SKX-NEXT: retq
4955
4944
;
4956
4945
; X86-SKX-LABEL: test_gather_16f32_mask_index_pair:
@@ -4960,12 +4949,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
4960
4949
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
4961
4950
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
4962
4951
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4963
- ; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4964
- ; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4952
+ ; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm2
4965
4953
; X86-SKX-NEXT: kmovw %k1, %k2
4966
4954
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
4967
- ; X86-SKX-NEXT: vgatherdps (%eax,%zmm2,8 ), %zmm0 {%k2}
4968
- ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2,8 ), %zmm1 {%k1}
4955
+ ; X86-SKX-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
4956
+ ; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
4969
4957
; X86-SKX-NEXT: retl
4970
4958
%wide.load = load <16 x i32 >, ptr %arr , align 4
4971
4959
%and = and <16 x i32 > %wide.load , <i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 , i32 536870911 >
0 commit comments