Skip to content

Commit b9e571a

Browse files
Rohit AggarwalRohit Aggarwal
Rohit Aggarwal
authored and
Rohit Aggarwal
committed
Restrict Scale so that it can happen fully or none. Added a logic for identify Signed bits.
1 parent 716943e commit b9e571a

File tree

2 files changed

+39
-43
lines changed

2 files changed

+39
-43
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56522,11 +56522,10 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
5652256522
if (DCI.isBeforeLegalize()) {
5652356523
// Attempt to move shifted index into the address scale, allows further
5652456524
// index truncation below.
56525-
// TODO
5652656525
if (Index.getOpcode() == ISD::SHL && isa<ConstantSDNode>(Scale)) {
5652756526
uint64_t ScaleAmt = Scale->getAsZExtVal();
5652856527
if (auto MinShAmt = DAG.getValidMinimumShiftAmount(Index)) {
56529-
if (*MinShAmt >= 1 && ScaleAmt < 8 &&
56528+
if (*MinShAmt >= 1 && (*MinShAmt + Log2_64(ScaleAmt)) < 4 &&
5653056529
DAG.ComputeNumSignBits(Index.getOperand(0)) > 1) {
5653156530
SDValue ShAmt = Index.getOperand(1);
5653256531
SDValue NewShAmt =
@@ -56546,7 +56545,16 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
5654656545
// Only do this before legalize types since v2i64 could become v2i32.
5654756546
// FIXME: We could check that the type is legal if we're after legalize
5654856547
// types, but then we would need to construct test cases where that happens.
56549-
if (IndexWidth > 32 && DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) {
56548+
unsigned ComputeNumSignBits = DAG.ComputeNumSignBits(Index);
56549+
if (Index.getOpcode() == ISD::SHL) {
56550+
if (auto MinShAmt = DAG.getValidMinimumShiftAmount(Index)) {
56551+
if (DAG.ComputeNumSignBits(Index.getOperand(0)) > 1) {
56552+
ComputeNumSignBits += *MinShAmt;
56553+
}
56554+
}
56555+
}
56556+
56557+
if (IndexWidth > 32 && ComputeNumSignBits > (IndexWidth - 32)) {
5655056558
EVT NewVT = IndexVT.changeVectorElementType(MVT::i32);
5655156559

5655256560
// FIXME: We could support more than just constant vectors, but we need to

llvm/test/CodeGen/X86/masked_gather_scatter.ll

Lines changed: 28 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4805,9 +4805,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48054805
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
48064806
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
48074807
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4808-
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4809-
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4810-
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4808+
; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm0
4809+
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
48114810
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
48124811
; X64-KNL-NEXT: retq
48134812
;
@@ -4818,9 +4817,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48184817
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48194818
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
48204819
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4821-
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4822-
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4823-
; X86-KNL-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
4820+
; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4821+
; X86-KNL-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
48244822
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
48254823
; X86-KNL-NEXT: retl
48264824
;
@@ -4829,9 +4827,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48294827
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
48304828
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
48314829
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4832-
; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4833-
; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4834-
; X64-SKX-NEXT: vgatherdps (%rdi,%zmm0,8), %zmm1 {%k1}
4830+
; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm0
4831+
; X64-SKX-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1}
48354832
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
48364833
; X64-SKX-NEXT: retq
48374834
;
@@ -4842,9 +4839,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16
48424839
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
48434840
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
48444841
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4845-
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4846-
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4847-
; X86-SKX-NEXT: vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
4842+
; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4843+
; X86-SKX-NEXT: vgatherdps (%eax,%zmm0), %zmm1 {%k1}
48484844
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
48494845
; X86-SKX-NEXT: retl
48504846
%wide.load = load <16 x i32>, ptr %arr, align 4
@@ -4861,9 +4857,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48614857
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
48624858
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
48634859
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4864-
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4865-
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4866-
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4860+
; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm0
4861+
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm0), %zmm1 {%k1}
48674862
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
48684863
; X64-KNL-NEXT: retq
48694864
;
@@ -4874,9 +4869,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48744869
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
48754870
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
48764871
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4877-
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4878-
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4879-
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
4872+
; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm0
4873+
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
48804874
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
48814875
; X86-KNL-NEXT: retl
48824876
;
@@ -4885,9 +4879,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48854879
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
48864880
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
48874881
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4888-
; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4889-
; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4890-
; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm0,8), %zmm1 {%k1}
4882+
; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm0
4883+
; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm0), %zmm1 {%k1}
48914884
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
48924885
; X64-SKX-NEXT: retq
48934886
;
@@ -4898,9 +4891,8 @@ define <16 x float> @test_gather_structpt_16f32_mask_index_offset(ptr %x, ptr %a
48984891
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
48994892
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
49004893
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4901-
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4902-
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm0
4903-
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
4894+
; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm0
4895+
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
49044896
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
49054897
; X86-SKX-NEXT: retl
49064898
%wide.load = load <16 x i32>, ptr %arr, align 4
@@ -4917,12 +4909,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49174909
; X64-KNL-NEXT: vpmovsxbd %xmm0, %zmm0
49184910
; X64-KNL-NEXT: vpslld $31, %zmm0, %zmm0
49194911
; X64-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
4920-
; X64-KNL-NEXT: vmovdqu64 (%rsi), %zmm0
4921-
; X64-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4912+
; X64-KNL-NEXT: vpslld $4, (%rsi), %zmm2
49224913
; X64-KNL-NEXT: kmovw %k1, %k2
49234914
; X64-KNL-NEXT: vmovaps %zmm1, %zmm0
4924-
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4925-
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
4915+
; X64-KNL-NEXT: vgatherdps (%rdi,%zmm2), %zmm0 {%k2}
4916+
; X64-KNL-NEXT: vgatherdps 4(%rdi,%zmm2), %zmm1 {%k1}
49264917
; X64-KNL-NEXT: retq
49274918
;
49284919
; X86-KNL-LABEL: test_gather_16f32_mask_index_pair:
@@ -4932,25 +4923,23 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49324923
; X86-KNL-NEXT: vptestmd %zmm0, %zmm0, %k1
49334924
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %eax
49344925
; X86-KNL-NEXT: movl {{[0-9]+}}(%esp), %ecx
4935-
; X86-KNL-NEXT: vmovdqu64 (%ecx), %zmm0
4936-
; X86-KNL-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4926+
; X86-KNL-NEXT: vpslld $4, (%ecx), %zmm2
49374927
; X86-KNL-NEXT: kmovw %k1, %k2
49384928
; X86-KNL-NEXT: vmovaps %zmm1, %zmm0
4939-
; X86-KNL-NEXT: vgatherdps (%eax,%zmm2,8), %zmm0 {%k2}
4940-
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
4929+
; X86-KNL-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
4930+
; X86-KNL-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
49414931
; X86-KNL-NEXT: retl
49424932
;
49434933
; X64-SKX-LABEL: test_gather_16f32_mask_index_pair:
49444934
; X64-SKX: # %bb.0:
49454935
; X64-SKX-NEXT: vpmovsxbd %xmm0, %zmm0
49464936
; X64-SKX-NEXT: vpslld $31, %zmm0, %zmm0
49474937
; X64-SKX-NEXT: vpmovd2m %zmm0, %k1
4948-
; X64-SKX-NEXT: vmovdqu64 (%rsi), %zmm0
4949-
; X64-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4938+
; X64-SKX-NEXT: vpslld $4, (%rsi), %zmm2
49504939
; X64-SKX-NEXT: kmovw %k1, %k2
49514940
; X64-SKX-NEXT: vmovaps %zmm1, %zmm0
4952-
; X64-SKX-NEXT: vgatherdps (%rdi,%zmm2,8), %zmm0 {%k2}
4953-
; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm2,8), %zmm1 {%k1}
4941+
; X64-SKX-NEXT: vgatherdps (%rdi,%zmm2), %zmm0 {%k2}
4942+
; X64-SKX-NEXT: vgatherdps 4(%rdi,%zmm2), %zmm1 {%k1}
49544943
; X64-SKX-NEXT: retq
49554944
;
49564945
; X86-SKX-LABEL: test_gather_16f32_mask_index_pair:
@@ -4960,12 +4949,11 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
49604949
; X86-SKX-NEXT: vpmovd2m %zmm0, %k1
49614950
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %eax
49624951
; X86-SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx
4963-
; X86-SKX-NEXT: vmovdqu64 (%ecx), %zmm0
4964-
; X86-SKX-NEXT: vpaddd %zmm0, %zmm0, %zmm2
4952+
; X86-SKX-NEXT: vpslld $4, (%ecx), %zmm2
49654953
; X86-SKX-NEXT: kmovw %k1, %k2
49664954
; X86-SKX-NEXT: vmovaps %zmm1, %zmm0
4967-
; X86-SKX-NEXT: vgatherdps (%eax,%zmm2,8), %zmm0 {%k2}
4968-
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2,8), %zmm1 {%k1}
4955+
; X86-SKX-NEXT: vgatherdps (%eax,%zmm2), %zmm0 {%k2}
4956+
; X86-SKX-NEXT: vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
49694957
; X86-SKX-NEXT: retl
49704958
%wide.load = load <16 x i32>, ptr %arr, align 4
49714959
%and = and <16 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>

0 commit comments

Comments
 (0)