|
5 | 5 | define <16 x i8> @load_v3i8(ptr %src) {
|
6 | 6 | ; CHECK-LABEL: load_v3i8:
|
7 | 7 | ; CHECK: ; %bb.0:
|
8 |
| -; CHECK-NEXT: sub sp, sp, #16 |
9 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
10 |
| -; CHECK-NEXT: ldrh w8, [x0] |
11 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
12 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
13 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
14 |
| -; CHECK-NEXT: umov.h w8, v0[0] |
15 |
| -; CHECK-NEXT: umov.h w9, v0[1] |
| 8 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 9 | +; CHECK-NEXT: ldrh w9, [x0] |
| 10 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
16 | 11 | ; CHECK-NEXT: fmov s0, w8
|
17 |
| -; CHECK-NEXT: add x8, x0, #2 |
18 |
| -; CHECK-NEXT: mov.b v0[1], w9 |
19 |
| -; CHECK-NEXT: ld1.b { v0 }[2], [x8] |
20 |
| -; CHECK-NEXT: add sp, sp, #16 |
21 | 12 | ; CHECK-NEXT: ret
|
22 | 13 | ;
|
23 | 14 | ; BE-LABEL: load_v3i8:
|
@@ -47,19 +38,14 @@ define <16 x i8> @load_v3i8(ptr %src) {
|
47 | 38 | define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
|
48 | 39 | ; CHECK-LABEL: load_v3i8_to_4xi32:
|
49 | 40 | ; CHECK: ; %bb.0:
|
50 |
| -; CHECK-NEXT: sub sp, sp, #16 |
51 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
52 |
| -; CHECK-NEXT: ldrh w8, [x0] |
| 41 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 42 | +; CHECK-NEXT: ldrh w9, [x0] |
53 | 43 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
54 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
55 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
56 |
| -; CHECK-NEXT: ldrsb w8, [x0, #2] |
57 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
58 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
59 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 44 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 45 | +; CHECK-NEXT: fmov s0, w8 |
| 46 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
60 | 47 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
61 | 48 | ; CHECK-NEXT: and.16b v0, v0, v1
|
62 |
| -; CHECK-NEXT: add sp, sp, #16 |
63 | 49 | ; CHECK-NEXT: ret
|
64 | 50 | ;
|
65 | 51 | ; BE-LABEL: load_v3i8_to_4xi32:
|
@@ -90,19 +76,14 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
|
90 | 76 | define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) {
|
91 | 77 | ; CHECK-LABEL: load_v3i8_to_4xi32_align_2:
|
92 | 78 | ; CHECK: ; %bb.0:
|
93 |
| -; CHECK-NEXT: sub sp, sp, #16 |
94 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
95 |
| -; CHECK-NEXT: ldrh w8, [x0] |
| 79 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 80 | +; CHECK-NEXT: ldrh w9, [x0] |
96 | 81 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
97 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
98 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
99 |
| -; CHECK-NEXT: ldrsb w8, [x0, #2] |
100 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
101 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
102 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 82 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 83 | +; CHECK-NEXT: fmov s0, w8 |
| 84 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
103 | 85 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
104 | 86 | ; CHECK-NEXT: and.16b v0, v0, v1
|
105 |
| -; CHECK-NEXT: add sp, sp, #16 |
106 | 87 | ; CHECK-NEXT: ret
|
107 | 88 | ;
|
108 | 89 | ; BE-LABEL: load_v3i8_to_4xi32_align_2:
|
@@ -161,19 +142,14 @@ define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) {
|
161 | 142 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
|
162 | 143 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
|
163 | 144 | ; CHECK: ; %bb.0:
|
164 |
| -; CHECK-NEXT: sub sp, sp, #16 |
165 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
166 |
| -; CHECK-NEXT: ldurh w8, [x0, #1] |
| 145 | +; CHECK-NEXT: ldrb w8, [x0, #3] |
| 146 | +; CHECK-NEXT: ldurh w9, [x0, #1] |
167 | 147 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
168 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
169 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
170 |
| -; CHECK-NEXT: ldrsb w8, [x0, #3] |
171 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
172 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
173 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 148 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 149 | +; CHECK-NEXT: fmov s0, w8 |
| 150 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
174 | 151 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
175 | 152 | ; CHECK-NEXT: and.16b v0, v0, v1
|
176 |
| -; CHECK-NEXT: add sp, sp, #16 |
177 | 153 | ; CHECK-NEXT: ret
|
178 | 154 | ;
|
179 | 155 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_1:
|
@@ -205,19 +181,14 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
|
205 | 181 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
|
206 | 182 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:
|
207 | 183 | ; CHECK: ; %bb.0:
|
208 |
| -; CHECK-NEXT: sub sp, sp, #16 |
209 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
210 |
| -; CHECK-NEXT: ldurh w8, [x0, #3] |
| 184 | +; CHECK-NEXT: ldrb w8, [x0, #5] |
| 185 | +; CHECK-NEXT: ldurh w9, [x0, #3] |
211 | 186 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
212 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
213 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
214 |
| -; CHECK-NEXT: ldrsb w8, [x0, #5] |
215 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
216 |
| -; CHECK-NEXT: mov.h v0[1], v0[1] |
217 |
| -; CHECK-NEXT: mov.h v0[2], w8 |
| 187 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 188 | +; CHECK-NEXT: fmov s0, w8 |
| 189 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
218 | 190 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
219 | 191 | ; CHECK-NEXT: and.16b v0, v0, v1
|
220 |
| -; CHECK-NEXT: add sp, sp, #16 |
221 | 192 | ; CHECK-NEXT: ret
|
222 | 193 | ;
|
223 | 194 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_3:
|
@@ -349,18 +320,14 @@ define <3 x i32> @load_v3i32(ptr %src) {
|
349 | 320 | define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
|
350 | 321 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32:
|
351 | 322 | ; CHECK: ; %bb.0:
|
352 |
| -; CHECK-NEXT: sub sp, sp, #16 |
353 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
354 |
| -; CHECK-NEXT: ldrh w8, [x0] |
| 323 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 324 | +; CHECK-NEXT: ldrh w9, [x0] |
355 | 325 | ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
|
356 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
357 |
| -; CHECK-NEXT: add x8, x0, #2 |
358 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
359 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
360 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 326 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 327 | +; CHECK-NEXT: fmov s0, w8 |
| 328 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
361 | 329 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
362 | 330 | ; CHECK-NEXT: and.16b v0, v0, v1
|
363 |
| -; CHECK-NEXT: add sp, sp, #16 |
364 | 331 | ; CHECK-NEXT: ret
|
365 | 332 | ;
|
366 | 333 | ; BE-LABEL: load_v3i8_zext_to_3xi32:
|
@@ -389,18 +356,14 @@ define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {
|
389 | 356 | define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) {
|
390 | 357 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32:
|
391 | 358 | ; CHECK: ; %bb.0:
|
392 |
| -; CHECK-NEXT: sub sp, sp, #16 |
393 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
394 |
| -; CHECK-NEXT: ldrh w8, [x0] |
395 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
396 |
| -; CHECK-NEXT: add x8, x0, #2 |
397 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
398 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
399 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 359 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 360 | +; CHECK-NEXT: ldrh w9, [x0] |
| 361 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 362 | +; CHECK-NEXT: fmov s0, w8 |
| 363 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
400 | 364 | ; CHECK-NEXT: ushll.4s v0, v0, #0
|
401 | 365 | ; CHECK-NEXT: shl.4s v0, v0, #24
|
402 | 366 | ; CHECK-NEXT: sshr.4s v0, v0, #24
|
403 |
| -; CHECK-NEXT: add sp, sp, #16 |
404 | 367 | ; CHECK-NEXT: ret
|
405 | 368 | ;
|
406 | 369 | ; BE-LABEL: load_v3i8_sext_to_3xi32:
|
@@ -514,19 +477,15 @@ entry:
|
514 | 477 | define void @load_ext_to_64bits(ptr %src, ptr %dst) {
|
515 | 478 | ; CHECK-LABEL: load_ext_to_64bits:
|
516 | 479 | ; CHECK: ; %bb.0: ; %entry
|
517 |
| -; CHECK-NEXT: sub sp, sp, #16 |
518 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
519 |
| -; CHECK-NEXT: ldrh w8, [x0] |
520 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
521 |
| -; CHECK-NEXT: add x8, x0, #2 |
522 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
523 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
524 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 480 | +; CHECK-NEXT: ldrb w9, [x0, #2] |
525 | 481 | ; CHECK-NEXT: add x8, x1, #4
|
| 482 | +; CHECK-NEXT: ldrh w10, [x0] |
| 483 | +; CHECK-NEXT: orr w9, w10, w9, lsl #16 |
| 484 | +; CHECK-NEXT: fmov s0, w9 |
| 485 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
526 | 486 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8
|
527 | 487 | ; CHECK-NEXT: st1.h { v0 }[2], [x8]
|
528 | 488 | ; CHECK-NEXT: str s0, [x1]
|
529 |
| -; CHECK-NEXT: add sp, sp, #16 |
530 | 489 | ; CHECK-NEXT: ret
|
531 | 490 | ;
|
532 | 491 | ; BE-LABEL: load_ext_to_64bits:
|
@@ -617,24 +576,20 @@ entry:
|
617 | 576 | define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
|
618 | 577 | ; CHECK-LABEL: load_ext_add_to_64bits:
|
619 | 578 | ; CHECK: ; %bb.0: ; %entry
|
620 |
| -; CHECK-NEXT: sub sp, sp, #16 |
621 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
622 |
| -; CHECK-NEXT: ldrh w8, [x0] |
| 579 | +; CHECK-NEXT: ldrb w8, [x0, #2] |
| 580 | +; CHECK-NEXT: ldrh w9, [x0] |
| 581 | +; CHECK-NEXT: orr w8, w9, w8, lsl #16 |
| 582 | +; CHECK-NEXT: fmov s0, w8 |
623 | 583 | ; CHECK-NEXT: Lloh2:
|
624 |
| -; CHECK-NEXT: adrp x9, lCPI15_0@PAGE |
625 |
| -; CHECK-NEXT: strh w8, [sp, #12] |
626 |
| -; CHECK-NEXT: add x8, x0, #2 |
627 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
| 584 | +; CHECK-NEXT: adrp x8, lCPI15_0@PAGE |
| 585 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
628 | 586 | ; CHECK-NEXT: Lloh3:
|
629 |
| -; CHECK-NEXT: ldr d1, [x9, lCPI15_0@PAGEOFF] |
630 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
631 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x8] |
| 587 | +; CHECK-NEXT: ldr d1, [x8, lCPI15_0@PAGEOFF] |
632 | 588 | ; CHECK-NEXT: add x8, x1, #4
|
633 | 589 | ; CHECK-NEXT: bic.4h v0, #255, lsl #8
|
634 | 590 | ; CHECK-NEXT: add.4h v0, v0, v1
|
635 | 591 | ; CHECK-NEXT: st1.h { v0 }[2], [x8]
|
636 | 592 | ; CHECK-NEXT: str s0, [x1]
|
637 |
| -; CHECK-NEXT: add sp, sp, #16 |
638 | 593 | ; CHECK-NEXT: ret
|
639 | 594 | ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
|
640 | 595 | ;
|
@@ -883,24 +838,21 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
|
883 | 838 | define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
|
884 | 839 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
|
885 | 840 | ; CHECK: ; %bb.0:
|
886 |
| -; CHECK-NEXT: sub sp, sp, #16 |
887 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
888 |
| -; CHECK-NEXT: ldrh w9, [x0] |
| 841 | +; CHECK-NEXT: ldrb w10, [x0, #2] |
889 | 842 | ; CHECK-NEXT: Lloh4:
|
890 | 843 | ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
|
891 |
| -; CHECK-NEXT: strh w9, [sp, #12] |
| 844 | +; CHECK-NEXT: ldrh w11, [x0] |
892 | 845 | ; CHECK-NEXT: add x9, x0, #2
|
893 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
894 | 846 | ; CHECK-NEXT: Lloh5:
|
895 | 847 | ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
|
896 | 848 | ; CHECK-NEXT: add x8, x0, #1
|
897 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
898 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 849 | +; CHECK-NEXT: orr w10, w11, w10, lsl #16 |
| 850 | +; CHECK-NEXT: fmov s0, w10 |
| 851 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
899 | 852 | ; CHECK-NEXT: uaddw.4s v0, v1, v0
|
900 |
| -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
901 | 853 | ; CHECK-NEXT: st1.b { v0 }[8], [x9]
|
902 | 854 | ; CHECK-NEXT: st1.b { v0 }[0], [x0]
|
903 |
| -; CHECK-NEXT: add sp, sp, #16 |
| 855 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
904 | 856 | ; CHECK-NEXT: ret
|
905 | 857 | ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
|
906 | 858 | ;
|
@@ -939,24 +891,21 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {
|
939 | 891 | define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) {
|
940 | 892 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
|
941 | 893 | ; CHECK: ; %bb.0:
|
942 |
| -; CHECK-NEXT: sub sp, sp, #16 |
943 |
| -; CHECK-NEXT: .cfi_def_cfa_offset 16 |
944 |
| -; CHECK-NEXT: ldrh w9, [x0] |
| 894 | +; CHECK-NEXT: ldrb w10, [x0, #2] |
945 | 895 | ; CHECK-NEXT: Lloh6:
|
946 | 896 | ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
|
947 |
| -; CHECK-NEXT: strh w9, [sp, #12] |
| 897 | +; CHECK-NEXT: ldrh w11, [x0] |
948 | 898 | ; CHECK-NEXT: add x9, x0, #2
|
949 |
| -; CHECK-NEXT: ldr s0, [sp, #12] |
950 | 899 | ; CHECK-NEXT: Lloh7:
|
951 | 900 | ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
|
952 | 901 | ; CHECK-NEXT: add x8, x0, #1
|
953 |
| -; CHECK-NEXT: ushll.8h v0, v0, #0 |
954 |
| -; CHECK-NEXT: ld1.b { v0 }[4], [x9] |
| 902 | +; CHECK-NEXT: orr w10, w11, w10, lsl #16 |
| 903 | +; CHECK-NEXT: fmov s0, w10 |
| 904 | +; CHECK-NEXT: zip1.8b v0, v0, v0 |
955 | 905 | ; CHECK-NEXT: uaddw.4s v0, v1, v0
|
956 |
| -; CHECK-NEXT: st1.b { v0 }[4], [x8] |
957 | 906 | ; CHECK-NEXT: st1.b { v0 }[8], [x9]
|
958 | 907 | ; CHECK-NEXT: st1.b { v0 }[0], [x0]
|
959 |
| -; CHECK-NEXT: add sp, sp, #16 |
| 908 | +; CHECK-NEXT: st1.b { v0 }[4], [x8] |
960 | 909 | ; CHECK-NEXT: ret
|
961 | 910 | ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
|
962 | 911 | ;
|
|
0 commit comments