|
4 | 4 |
|
5 | 5 | ; Test with more than four live mask pairs
|
6 | 6 |
|
7 |
| -define void @test(<16 x i32> %a0, <16 x i32> %b0, |
8 |
| - <16 x i32> %a1, <16 x i32> %b1, |
9 |
| - <16 x i32> %a2, <16 x i32> %b2, |
10 |
| - <16 x i32> %a3, <16 x i32> %b3, |
11 |
| - <16 x i32> %a4, <16 x i32> %b4, |
12 |
| - i16* nocapture %m0, i16* nocapture %m1) { |
| 7 | +define void @test(<16 x i32> %a0, <16 x i32> %b0, <16 x i32> %a1, <16 x i32> %b1, <16 x i32> %a2, <16 x i32> %b2, <16 x i32> %a3, <16 x i32> %b3, <16 x i32> %a4, <16 x i32> %b4, i16* nocapture %m0, i16* nocapture %m1) nounwind { |
13 | 8 | ; X86-LABEL: test:
|
14 | 9 | ; X86: # %bb.0: # %entry
|
15 | 10 | ; X86-NEXT: pushl %ebp
|
16 |
| -; X86-NEXT: .cfi_def_cfa_offset 8 |
17 |
| -; X86-NEXT: .cfi_offset %ebp, -8 |
18 | 11 | ; X86-NEXT: movl %esp, %ebp
|
19 |
| -; X86-NEXT: .cfi_def_cfa_register %ebp |
20 | 12 | ; X86-NEXT: pushl %edi
|
21 | 13 | ; X86-NEXT: pushl %esi
|
22 | 14 | ; X86-NEXT: andl $-64, %esp
|
23 | 15 | ; X86-NEXT: subl $64, %esp
|
24 |
| -; X86-NEXT: .cfi_offset %esi, -16 |
25 |
| -; X86-NEXT: .cfi_offset %edi, -12 |
26 | 16 | ; X86-NEXT: movl 456(%ebp), %esi
|
27 | 17 | ; X86-NEXT: vmovaps 328(%ebp), %zmm3
|
28 | 18 | ; X86-NEXT: vmovaps 200(%ebp), %zmm4
|
@@ -62,20 +52,24 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0,
|
62 | 52 | ; X86-NEXT: kmovw %k2, %edi
|
63 | 53 | ; X86-NEXT: addl %ecx, %edx
|
64 | 54 | ; X86-NEXT: kmovw %k1, %ecx
|
| 55 | +; X86-NEXT: addl %edi, %ecx |
| 56 | +; X86-NEXT: addl %eax, %ecx |
| 57 | +; X86-NEXT: addl %edx, %ecx |
| 58 | +; X86-NEXT: movw %cx, (%esi) |
| 59 | +; X86-NEXT: leal -8(%ebp), %esp |
| 60 | +; X86-NEXT: popl %esi |
| 61 | +; X86-NEXT: popl %edi |
| 62 | +; X86-NEXT: popl %ebp |
| 63 | +; X86-NEXT: retl |
65 | 64 | ;
|
66 | 65 | ; X64-LABEL: test:
|
67 | 66 | ; X64: # %bb.0: # %entry
|
68 | 67 | ; X64-NEXT: pushq %rbp
|
69 |
| -; X64-NEXT: .cfi_def_cfa_offset 16 |
70 |
| -; X64-NEXT: .cfi_offset %rbp, -16 |
71 | 68 | ; X64-NEXT: movq %rsp, %rbp
|
72 |
| -; X64-NEXT: .cfi_def_cfa_register %rbp |
73 | 69 | ; X64-NEXT: pushq %r14
|
74 | 70 | ; X64-NEXT: pushq %rbx
|
75 | 71 | ; X64-NEXT: andq $-64, %rsp
|
76 | 72 | ; X64-NEXT: subq $64, %rsp
|
77 |
| -; X64-NEXT: .cfi_offset %rbx, -32 |
78 |
| -; X64-NEXT: .cfi_offset %r14, -24 |
79 | 73 | ; X64-NEXT: movq %rdi, %r14
|
80 | 74 | ; X64-NEXT: vmovaps 16(%rbp), %zmm8
|
81 | 75 | ; X64-NEXT: vp2intersectd %zmm1, %zmm0, %k0
|
@@ -111,6 +105,17 @@ define void @test(<16 x i32> %a0, <16 x i32> %b0,
|
111 | 105 | ; X64-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
|
112 | 106 | ; X64-NEXT: kmovw %k0, %edi
|
113 | 107 | ; X64-NEXT: kmovw %k1, %ebx
|
| 108 | +; X64-NEXT: addl %edi, %eax |
| 109 | +; X64-NEXT: addl %ecx, %edx |
| 110 | +; X64-NEXT: leal (%rbx,%rsi), %ecx |
| 111 | +; X64-NEXT: addl %eax, %ecx |
| 112 | +; X64-NEXT: addl %edx, %ecx |
| 113 | +; X64-NEXT: movw %cx, (%r14) |
| 114 | +; X64-NEXT: leaq -16(%rbp), %rsp |
| 115 | +; X64-NEXT: popq %rbx |
| 116 | +; X64-NEXT: popq %r14 |
| 117 | +; X64-NEXT: popq %rbp |
| 118 | +; X64-NEXT: retq |
114 | 119 | entry:
|
115 | 120 | %0 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a0, <16 x i32> %b0)
|
116 | 121 | %1 = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %a1, <16 x i32> %b1)
|
|
0 commit comments