Skip to content

Commit 716943e

Browse files
Rohit AggarwalRohit Aggarwal
Rohit Aggarwal
authored and
Rohit Aggarwal
committed
Fold opertation
1 parent 47bf70b commit 716943e

File tree

2 files changed

+40
-85
lines changed

2 files changed

+40
-85
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56580,6 +56580,14 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
5658056580

5658156581
EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
5658256582

56583+
if (Index.getOpcode() == ISD::SHL) {
56584+
unsigned BitWidth = Index.getScalarValueSizeInBits();
56585+
unsigned MaskBits = BitWidth - Log2_32(Scale->getAsZExtVal());
56586+
APInt DemandedBits = APInt::getLowBitsSet(BitWidth, MaskBits);
56587+
if (TLI.SimplifyDemandedBits(Index, DemandedBits, DCI)) {
56588+
return SDValue(N, 0);
56589+
}
56590+
}
5658356591
// Try to move splat adders from the index operand to the base
5658456592
// pointer operand. Taking care to multiply by the scale. We can only do
5658556593
// this when index element type is the same as the pointer type.

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 32 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -4806,7 +4806,6 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48064806
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
48074807
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48084808
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4809-
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
48104809
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
48114810
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
48124811
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
@@ -4820,36 +4819,21 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48204819
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
48214820
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
48224821
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4823-
; X86-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
48244822
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
48254823
; X86-KNL-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
48264824
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
48274825
; X86-KNL-NEXT: retl
48284826
;
4829-
; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index:
4830-
; X64-SKX-SMALL: # %bb.0:
4831-
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
4832-
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
4833-
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
4834-
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
4835-
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4836-
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4837-
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4838-
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
4839-
; X64-SKX-SMALL-NEXT: retq
4840-
;
4841-
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index:
4842-
; X64-SKX-LARGE: # %bb.0:
4843-
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
4844-
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
4845-
; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
4846-
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
4847-
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
4848-
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4849-
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4850-
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4851-
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
4852-
; X64-SKX-LARGE-NEXT: retq
4827+
; X64-SKX-LABEL: test_gather_structpt_16f32_mask_index:
4828+
; X64-SKX: # %bb.0:
4829+
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
4830+
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
4831+
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4832+
; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4833+
; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4834+
; X64-SKX-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4835+
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4836+
; X64-SKX-NEXT: retq
48534837
;
48544838
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index:
48554839
; X86-SKX: # %bb.0:
@@ -4859,7 +4843,6 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48594843
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
48604844
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
48614845
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4862-
; X86-SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
48634846
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
48644847
; X86-SKX-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
48654848
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
@@ -4879,7 +4862,6 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48794862
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
48804863
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48814864
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4882-
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
48834865
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
48844866
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
48854867
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
@@ -4893,36 +4875,21 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48934875
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
48944876
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
48954877
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4896-
; X86-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
48974878
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
48984879
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
48994880
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
49004881
; X86-KNL-NEXT: retl
49014882
;
4902-
; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index_offset:
4903-
; X64-SKX-SMALL: # %bb.0:
4904-
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
4905-
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
4906-
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
4907-
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
4908-
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4909-
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4910-
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4911-
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
4912-
; X64-SKX-SMALL-NEXT: retq
4913-
;
4914-
; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index_offset:
4915-
; X64-SKX-LARGE: # %bb.0:
4916-
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
4917-
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
4918-
; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
4919-
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
4920-
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
4921-
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
4922-
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4923-
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4924-
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
4925-
; X64-SKX-LARGE-NEXT: retq
4883+
; X64-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset:
4884+
; X64-SKX: # %bb.0:
4885+
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
4886+
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
4887+
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4888+
; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4889+
; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4890+
; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4891+
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4892+
; X64-SKX-NEXT: retq
49264893
;
49274894
; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index_offset:
49284895
; X86-SKX: # %bb.0:
@@ -4932,7 +4899,6 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
49324899
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
49334900
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
49344901
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4935-
; X86-SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
49364902
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
49374903
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
49384904
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
@@ -4952,7 +4918,6 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49524918
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
49534919
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
49544920
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4955-
; X64-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
49564921
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
49574922
; X64-KNL-NEXT: kmovw %k1, %k2
49584923
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
@@ -4968,42 +4933,25 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49684933
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
49694934
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
49704935
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4971-
; X86-KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
49724936
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
49734937
; X86-KNL-NEXT: kmovw %k1, %k2
49744938
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
49754939
; X86-KNL-NEXT: vgatherdps (%eax,%zmm2,8), %zmm0 {%k2}
49764940
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
49774941
; X86-KNL-NEXT: retl
49784942
;
4979-
; X64-SKX-SMALL-LABEL: test_gather_16f32_mask_index_pair:
4980-
; X64-SKX-SMALL: # %bb.0:
4981-
; X64-SKX-SMALL-NEXT: vpmovsxbd %xmm0, %zmm0
4982-
; X64-SKX-SMALL-NEXT: vpslld $31, %zmm0, %zmm0
4983-
; X64-SKX-SMALL-NEXT: vpmovd2m %zmm0, %k1
4984-
; X64-SKX-SMALL-NEXT: vmovdqu64 (%rsi), %zmm0
4985-
; X64-SKX-SMALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
4986-
; X64-SKX-SMALL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4987-
; X64-SKX-SMALL-NEXT: kmovw %k1, %k2
4988-
; X64-SKX-SMALL-NEXT: vmovaps %zmm1, %zmm0
4989-
; X64-SKX-SMALL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4990-
; X64-SKX-SMALL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
4991-
; X64-SKX-SMALL-NEXT: retq
4992-
;
4993-
; X64-SKX-LARGE-LABEL: test_gather_16f32_mask_index_pair:
4994-
; X64-SKX-LARGE: # %bb.0:
4995-
; X64-SKX-LARGE-NEXT: vpmovsxbd %xmm0, %zmm0
4996-
; X64-SKX-LARGE-NEXT: vpslld $31, %zmm0, %zmm0
4997-
; X64-SKX-LARGE-NEXT: vpmovd2m %zmm0, %k1
4998-
; X64-SKX-LARGE-NEXT: vmovdqu64 (%rsi), %zmm0
4999-
; X64-SKX-LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
5000-
; X64-SKX-LARGE-NEXT: vpandd (%rax){1to16}, %zmm0, %zmm0
5001-
; X64-SKX-LARGE-NEXT: vpaddd %zmm0, %zmm0, %zmm2
5002-
; X64-SKX-LARGE-NEXT: kmovw %k1, %k2
5003-
; X64-SKX-LARGE-NEXT: vmovaps %zmm1, %zmm0
5004-
; X64-SKX-LARGE-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
5005-
; X64-SKX-LARGE-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
5006-
; X64-SKX-LARGE-NEXT: retq
4943+
; X64-SKX-LABEL: test_gather_16f32_mask_index_pair:
4944+
; X64-SKX: # %bb.0:
4945+
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
4946+
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
4947+
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4948+
; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4949+
; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4950+
; X64-SKX-NEXT: kmovw %k1, %k2
4951+
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4952+
; X64-SKX-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4953+
; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
4954+
; X64-SKX-NEXT: retq
50074955
;
50084956
; X86-SKX-LABEL: test_gather_16f32_mask_index_pair:
50094957
; X86-SKX: # %bb.0:
@@ -5013,7 +4961,6 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
50134961
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
50144962
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
50154963
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
5016-
; X86-SKX-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
50174964
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
50184965
; X86-SKX-NEXT: kmovw %k1, %k2
50194966
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0

0 commit comments

Comments
 (0)