Skip to content

[X86] Add new #134979 test cases for gather scalar #137416

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 29, 2025

Conversation

rohitaggarwal007
Copy link
Contributor

@rohitaggarwal007 rohitaggarwal007 commented Apr 25, 2025

We are adding new test cases to see the transformation impact due to #134979.

These are similar to previous commit #688c3ffb057a87b86c6c1e77040418adf511efbb.

Handling struct with two member and different vector factor.

@llvmbot
Copy link
Member

llvmbot commented Apr 25, 2025

@llvm/pr-subscribers-backend-x86

Author: Rohit Aggarwal (rohitaggarwal007)

Changes

Patch is 29.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137416.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/masked_gather_scatter.ll (+512-1)
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 46e589b7b1be9..33182b8faafd1 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -4812,6 +4812,7 @@ declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x
 
 ; Test gathers from struct
 %struct.pt = type { float, float, float, i32 }
+%struct.pt2 = type { float, float }
 
 define <16 x float> @test_gather_structpt_16f32_mask_index(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
 ; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index:
@@ -5107,7 +5108,517 @@ define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair(ptr %x, p
   %res1 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs1, i32 4, <16 x i1> %mask, <16 x float> %src0)
   %ptrs = getelementptr inbounds %struct.pt, ptr %x, <16 x i64> %zext, i32 1
   %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
-  %pair1 = insertvalue {<16 x float>, <16 x float>} undef, <16 x float> %res1, 0
+  %pair1 = insertvalue {<16 x float>, <16 x float>} poison, <16 x float> %res1, 0
+  %pair2 = insertvalue {<16 x float>, <16 x float>} %pair1, <16 x float> %res, 1
+  ret {<16 x float>, <16 x float>} %pair2
+}
+
+define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-KNL:       # %bb.0:
+; X64-KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; X64-KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; X64-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; X64-KNL-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [536870911,536870911,536870911,536870911,536870911,536870911,536870911,536870911]
+; X64-KNL-NEXT:    vpand (%rsi), %ymm0, %ymm0
+; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-KNL-NEXT:    vmovaps %ymm1, %ymm0
+; X64-KNL-NEXT:    retq
+;
+; X86-KNL-LABEL: test_gather_structpt_8f32_mask_index:
+; X86-KNL:       # %bb.0:
+; X86-KNL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; X86-KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; X86-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT:    vmovdqu (%ecx), %ymm0
+; X86-KNL-NEXT:    vpslld $3, %ymm0, %ymm0
+; X86-KNL-NEXT:    vgatherdps (%eax,%zmm0), %zmm1 {%k1}
+; X86-KNL-NEXT:    vmovaps %ymm1, %ymm0
+; X86-KNL-NEXT:    retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-SKX-SMALL:       # %bb.0:
+; X64-SKX-SMALL-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpslld $31, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpmovd2m %ymm0, %k1
+; X64-SKX-SMALL-NEXT:    vmovdqu (%rsi), %ymm0
+; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT:    vmovaps %ymm1, %ymm0
+; X64-SKX-SMALL-NEXT:    retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_structpt_8f32_mask_index:
+; X64-SKX-LARGE:       # %bb.0:
+; X64-SKX-LARGE-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpslld $31, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpmovd2m %ymm0, %k1
+; X64-SKX-LARGE-NEXT:    vmovdqu (%rsi), %ymm0
+; X64-SKX-LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT:    vpandd (%rax){1to8}, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT:    vmovaps %ymm1, %ymm0
+; X64-SKX-LARGE-NEXT:    retq
+;
+; X86-SKX-LABEL: test_gather_structpt_8f32_mask_index:
+; X86-SKX:       # %bb.0:
+; X86-SKX-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X86-SKX-NEXT:    vpslld $31, %ymm0, %ymm0
+; X86-SKX-NEXT:    vpmovd2m %ymm0, %k1
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT:    vmovups (%ecx), %ymm0
+; X86-SKX-NEXT:    vgatherdps (%eax,%ymm0,8), %ymm1 {%k1}
+; X86-SKX-NEXT:    vmovaps %ymm1, %ymm0
+; X86-SKX-NEXT:    retl
+  %wide.load = load <8 x i32>, ptr %arr, align 4
+  %and = and <8 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+  %zext = zext <8 x i32> %and to <8 x i64>
+  %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <8 x i64> %zext
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %src0)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_gather_structpt_8f32_mask_index_offset(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-KNL:       # %bb.0:
+; X64-KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; X64-KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; X64-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; X64-KNL-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [536870911,536870911,536870911,536870911,536870911,536870911,536870911,536870911]
+; X64-KNL-NEXT:    vpand (%rsi), %ymm0, %ymm0
+; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-KNL-NEXT:    vmovaps %ymm1, %ymm0
+; X64-KNL-NEXT:    retq
+;
+; X86-KNL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X86-KNL:       # %bb.0:
+; X86-KNL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; X86-KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; X86-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT:    vmovdqu (%ecx), %ymm0
+; X86-KNL-NEXT:    vpslld $3, %ymm0, %ymm0
+; X86-KNL-NEXT:    vgatherdps 4(%eax,%zmm0), %zmm1 {%k1}
+; X86-KNL-NEXT:    vmovaps %ymm1, %ymm0
+; X86-KNL-NEXT:    retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-SKX-SMALL:       # %bb.0:
+; X64-SKX-SMALL-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpslld $31, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpmovd2m %ymm0, %k1
+; X64-SKX-SMALL-NEXT:    vmovdqu (%rsi), %ymm0
+; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT:    vgatherqps 4(%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT:    vmovaps %ymm1, %ymm0
+; X64-SKX-SMALL-NEXT:    retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X64-SKX-LARGE:       # %bb.0:
+; X64-SKX-LARGE-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpslld $31, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpmovd2m %ymm0, %k1
+; X64-SKX-LARGE-NEXT:    vmovdqu (%rsi), %ymm0
+; X64-SKX-LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT:    vpandd (%rax){1to8}, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT:    vgatherqps 4(%rdi,%zmm0,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT:    vmovaps %ymm1, %ymm0
+; X64-SKX-LARGE-NEXT:    retq
+;
+; X86-SKX-LABEL: test_gather_structpt_8f32_mask_index_offset:
+; X86-SKX:       # %bb.0:
+; X86-SKX-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X86-SKX-NEXT:    vpslld $31, %ymm0, %ymm0
+; X86-SKX-NEXT:    vpmovd2m %ymm0, %k1
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT:    vmovups (%ecx), %ymm0
+; X86-SKX-NEXT:    vgatherdps 4(%eax,%ymm0,8), %ymm1 {%k1}
+; X86-SKX-NEXT:    vmovaps %ymm1, %ymm0
+; X86-SKX-NEXT:    retl
+  %wide.load = load <8 x i32>, ptr %arr, align 4
+  %and = and <8 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+  %zext = zext <8 x i32> %and to <8 x i64>
+  %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <8 x i64> %zext, i32 1
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %src0)
+  ret <8 x float> %res
+}
+
+define {<8 x float>, <8 x float>} @test_gather_8f32_mask_index_pair(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
+; X64-KNL-LABEL: test_gather_8f32_mask_index_pair:
+; X64-KNL:       # %bb.0:
+; X64-KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; X64-KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; X64-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; X64-KNL-NEXT:    vpbroadcastd {{.*#+}} ymm0 = [536870911,536870911,536870911,536870911,536870911,536870911,536870911,536870911]
+; X64-KNL-NEXT:    vpand (%rsi), %ymm0, %ymm0
+; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT:    kmovw %k1, %k2
+; X64-KNL-NEXT:    vmovaps %ymm1, %ymm0
+; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm2,8), %ymm0 {%k2}
+; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-KNL-NEXT:    retq
+;
+; X86-KNL-LABEL: test_gather_8f32_mask_index_pair:
+; X86-KNL:       # %bb.0:
+; X86-KNL-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; X86-KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; X86-KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; X86-KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT:    vmovdqu (%ecx), %ymm0
+; X86-KNL-NEXT:    vpslld $3, %ymm0, %ymm2
+; X86-KNL-NEXT:    vmovaps %zmm1, %zmm0
+; X86-KNL-NEXT:    kmovw %k1, %k2
+; X86-KNL-NEXT:    vgatherdps (%eax,%zmm2), %zmm0 {%k2}
+; X86-KNL-NEXT:    vgatherdps 4(%eax,%zmm2), %zmm1 {%k1}
+; X86-KNL-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
+; X86-KNL-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
+; X86-KNL-NEXT:    retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_8f32_mask_index_pair:
+; X64-SKX-SMALL:       # %bb.0:
+; X64-SKX-SMALL-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpslld $31, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpmovd2m %ymm0, %k1
+; X64-SKX-SMALL-NEXT:    vmovdqu (%rsi), %ymm0
+; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT:    kmovw %k1, %k2
+; X64-SKX-SMALL-NEXT:    vmovaps %ymm1, %ymm0
+; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm2,8), %ymm0 {%k2}
+; X64-SKX-SMALL-NEXT:    vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT:    retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_8f32_mask_index_pair:
+; X64-SKX-LARGE:       # %bb.0:
+; X64-SKX-LARGE-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpslld $31, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpmovd2m %ymm0, %k1
+; X64-SKX-LARGE-NEXT:    vmovdqu (%rsi), %ymm0
+; X64-SKX-LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT:    vpandd (%rax){1to8}, %ymm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT:    kmovw %k1, %k2
+; X64-SKX-LARGE-NEXT:    vmovaps %ymm1, %ymm0
+; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm2,8), %ymm0 {%k2}
+; X64-SKX-LARGE-NEXT:    vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT:    retq
+;
+; X86-SKX-LABEL: test_gather_8f32_mask_index_pair:
+; X86-SKX:       # %bb.0:
+; X86-SKX-NEXT:    vpmovsxwd %xmm0, %ymm0
+; X86-SKX-NEXT:    vpslld $31, %ymm0, %ymm0
+; X86-SKX-NEXT:    vpmovd2m %ymm0, %k1
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT:    vmovups (%ecx), %ymm2
+; X86-SKX-NEXT:    kmovw %k1, %k2
+; X86-SKX-NEXT:    vmovaps %ymm1, %ymm0
+; X86-SKX-NEXT:    vgatherdps (%eax,%ymm2,8), %ymm0 {%k2}
+; X86-SKX-NEXT:    vgatherdps 4(%eax,%ymm2,8), %ymm1 {%k1}
+; X86-SKX-NEXT:    retl
+  %wide.load = load <8 x i32>, ptr %arr, align 4
+  %and = and <8 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+  %zext = zext <8 x i32> %and to <8 x i64>
+  %ptrs1 = getelementptr inbounds %struct.pt2, ptr %x , <8 x i64> %zext
+  %res1 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs1, i32 4, <8 x i1> %mask, <8 x float> %src0)
+  %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <8 x i64> %zext, i32 1
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %src0)
+  %pair1 = insertvalue {<8 x float>, <8 x float>} poison, <8 x float> %res1, 0
+  %pair2 = insertvalue {<8 x float>, <8 x float>} %pair1, <8 x float> %res, 1
+  ret {<8 x float>, <8 x float>} %pair2
+}
+
+define <16 x float> @test_gather_structpt_16f32_mask_index1(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-KNL:       # %bb.0:
+; X64-KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X64-KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; X64-KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; X64-KNL-NEXT:    vmovdqu64 (%rsi), %zmm0
+; X64-KNL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; X64-KNL-NEXT:    kshiftrw $8, %k1, %k2
+; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-KNL-NEXT:    vgatherqps (%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-KNL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-KNL-NEXT:    retq
+;
+; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X86-KNL:       # %bb.0:
+; X86-KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X86-KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; X86-KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT:    vmovups (%ecx), %zmm0
+; X86-KNL-NEXT:    vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
+; X86-KNL-NEXT:    vmovaps %zmm1, %zmm0
+; X86-KNL-NEXT:    retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-SKX-SMALL:       # %bb.0:
+; X64-SKX-SMALL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vpslld $31, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vpmovd2m %zmm0, %k1
+; X64-SKX-SMALL-NEXT:    vmovdqu64 (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-SMALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; X64-SKX-SMALL-NEXT:    kshiftrw $8, %k1, %k2
+; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-SKX-SMALL-NEXT:    vgatherqps (%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-SMALL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-SMALL-NEXT:    retq
+;
+; X64-SKX-LARGE-LABEL: test_gather_structpt_16f32_mask_index1:
+; X64-SKX-LARGE:       # %bb.0:
+; X64-SKX-LARGE-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X64-SKX-LARGE-NEXT:    vpslld $31, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT:    vpmovd2m %zmm0, %k1
+; X64-SKX-LARGE-NEXT:    vmovdqu64 (%rsi), %zmm0
+; X64-SKX-LARGE-NEXT:    movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax
+; X64-SKX-LARGE-NEXT:    vpandd (%rax){1to16}, %zmm0, %zmm0
+; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; X64-SKX-LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-SKX-LARGE-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; X64-SKX-LARGE-NEXT:    kshiftrw $8, %k1, %k2
+; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-SKX-LARGE-NEXT:    vgatherqps (%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-SKX-LARGE-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-SKX-LARGE-NEXT:    retq
+;
+; X86-SKX-LABEL: test_gather_structpt_16f32_mask_index1:
+; X86-SKX:       # %bb.0:
+; X86-SKX-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X86-SKX-NEXT:    vpslld $31, %zmm0, %zmm0
+; X86-SKX-NEXT:    vpmovd2m %zmm0, %k1
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SKX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SKX-NEXT:    vmovups (%ecx), %zmm0
+; X86-SKX-NEXT:    vgatherdps (%eax,%zmm0,8), %zmm1 {%k1}
+; X86-SKX-NEXT:    vmovaps %zmm1, %zmm0
+; X86-SKX-NEXT:    retl
+  %wide.load = load <16 x i32>, ptr %arr, align 4
+  %and = and <16 x i32> %wide.load, <i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911, i32 536870911>
+  %zext = zext <16 x i32> %and to <16 x i64>
+  %ptrs = getelementptr inbounds %struct.pt2, ptr %x, <16 x i64> %zext
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_gather_structpt_16f32_mask_index_offset1(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
+; X64-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X64-KNL:       # %bb.0:
+; X64-KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X64-KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; X64-KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; X64-KNL-NEXT:    vmovdqu64 (%rsi), %zmm0
+; X64-KNL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; X64-KNL-NEXT:    vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
+; X64-KNL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
+; X64-KNL-NEXT:    kshiftrw $8, %k1, %k2
+; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm0,8), %ymm3 {%k2}
+; X64-KNL-NEXT:    vgatherqps 4(%rdi,%zmm2,8), %ymm1 {%k1}
+; X64-KNL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm1, %zmm0
+; X64-KNL-NEXT:    retq
+;
+; X86-KNL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X86-KNL:       # %bb.0:
+; X86-KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X86-KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; X86-KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-KNL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-KNL-NEXT:    vmovups (%ecx), %zmm0
+; X86-KNL-NEXT:    vgatherdps 4(%eax,%zmm0,8), %zmm1 {%k1}
+; X86-KNL-NEXT:    vmovaps %zmm1, %zmm0
+; X86-KNL-NEXT:    retl
+;
+; X64-SKX-SMALL-LABEL: test_gather_structpt_16f32_mask_index_offset1:
+; X64-SKX-SMALL:       # %bb.0:
+; X64-SKX-SMALL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vpslld $31, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vpmovd2m %zmm0, %k1
+; X64-SKX-SMALL-NEXT:    vmovdqu64 (%rsi), %zmm0
+; X64-SKX-SMALL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-SKX-SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm2 = ymm0[0],ze...
[truncated]

@rohitaggarwal007
Copy link
Contributor Author

@RKSimon I have created this PR for the new test cases in @ #134979
Please review

@RKSimon RKSimon self-requested a review April 28, 2025 09:25
ret <16 x float> %res
}

define {<16 x float>, <16 x float>} @test_gather_16f32_mask_index_pair2(ptr %x, ptr %arr, <16 x i1> %mask, <16 x float> %src0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test_gather_structpt2_16f32_mask_index_pair2 ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, will fix

ret {<16 x float>, <16 x float>} %pair2
}

define <8 x float> @test_gather_structpt_8f32_mask_index(ptr %x, ptr %arr, <8 x i1> %mask, <8 x float> %src0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe rename these tests define test_gather_structpt2_* to make it clear they use the %struct.pt2 aggregate?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

@rohitaggarwal007
Copy link
Contributor Author

@RKSimon, I have updated the testcase name

@RKSimon RKSimon changed the title Add the new test cases for gather scalar [X86] Add new #134979 test cases for gather scalar Apr 29, 2025
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - make sure the commit message explains these new tests are for #134979

@rohitaggarwal007
Copy link
Contributor Author

@RKSimon, I don't have the write permission. I am not able to see the merge branch button. Can you please commit on my behalf?

@RKSimon RKSimon merged commit 2466100 into llvm:main Apr 29, 2025
11 checks passed
@rohitaggarwal007 rohitaggarwal007 deleted the MaskedtestCase branch April 29, 2025 13:14
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
We are adding new test cases to see the transformation impact due to llvm#134979.

These are similar to previous commit #688c3ffb057a87b86c6c1e77040418adf511efbb.

Handling struct with two member and different vector factor.

---------

Co-authored-by: Rohit Aggarwal <[email protected]>
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
We are adding new test cases to see the transformation impact due to llvm#134979.

These are similar to previous commit #688c3ffb057a87b86c6c1e77040418adf511efbb.

Handling struct with two member and different vector factor.

---------

Co-authored-by: Rohit Aggarwal <[email protected]>
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
We are adding new test cases to see the transformation impact due to llvm#134979.

These are similar to previous commit #688c3ffb057a87b86c6c1e77040418adf511efbb.

Handling struct with two member and different vector factor.

---------

Co-authored-by: Rohit Aggarwal <[email protected]>
GeorgeARM pushed a commit to GeorgeARM/llvm-project that referenced this pull request May 7, 2025
We are adding new test cases to see the transformation impact due to llvm#134979.

These are similar to previous commit #688c3ffb057a87b86c6c1e77040418adf511efbb.

Handling struct with two member and different vector factor.

---------

Co-authored-by: Rohit Aggarwal <[email protected]>
Ankur-0429 pushed a commit to Ankur-0429/llvm-project that referenced this pull request May 9, 2025
We are adding new test cases to see the transformation impact due to llvm#134979.

These are similar to previous commit #688c3ffb057a87b86c6c1e77040418adf511efbb.

Handling struct with two member and different vector factor.

---------

Co-authored-by: Rohit Aggarwal <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants