|
2 | 2 | ; RUN: llc -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
|
3 | 3 | ; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck --check-prefix BE %s
|
4 | 4 |
|
5 |
| -define <16 x i8> @load_v3i8(ptr %src, ptr %dst) { |
| 5 | +define <16 x i8> @load_v3i8(ptr %src) { |
6 | 6 | ; CHECK-LABEL: load_v3i8:
|
7 | 7 | ; CHECK: ; %bb.0:
|
8 | 8 | ; CHECK-NEXT: sub sp, sp, #16
|
@@ -44,7 +44,7 @@ define <16 x i8> @load_v3i8(ptr %src, ptr %dst) {
|
44 | 44 | ret <16 x i8> %s
|
45 | 45 | }
|
46 | 46 |
|
47 |
| -define <4 x i32> @load_v3i8_to_4xi32(ptr %src, ptr %dst) { |
| 47 | +define <4 x i32> @load_v3i8_to_4xi32(ptr %src) { |
48 | 48 | ; CHECK-LABEL: load_v3i8_to_4xi32:
|
49 | 49 | ; CHECK: ; %bb.0:
|
50 | 50 | ; CHECK-NEXT: sub sp, sp, #16
|
@@ -87,7 +87,95 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src, ptr %dst) {
|
87 | 87 | ret <4 x i32> %e
|
88 | 88 | }
|
89 | 89 |
|
90 |
| -define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src, ptr %dst) { |
| 90 | +define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) { |
| 91 | +; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1: |
| 92 | +; CHECK: ; %bb.0: |
| 93 | +; CHECK-NEXT: sub sp, sp, #16 |
| 94 | +; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| 95 | +; CHECK-NEXT: ldurh w8, [x0, #1] |
| 96 | +; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
| 97 | +; CHECK-NEXT: strh w8, [sp, #12] |
| 98 | +; CHECK-NEXT: ldr s0, [sp, #12] |
| 99 | +; CHECK-NEXT: ldrsb w8, [x0, #3] |
| 100 | +; CHECK-NEXT: ushll.8h v0, v0, #0 |
| 101 | +; CHECK-NEXT: mov.h v0[1], v0[1] |
| 102 | +; CHECK-NEXT: mov.h v0[2], w8 |
| 103 | +; CHECK-NEXT: ushll.4s v0, v0, #0 |
| 104 | +; CHECK-NEXT: and.16b v0, v0, v1 |
| 105 | +; CHECK-NEXT: add sp, sp, #16 |
| 106 | +; CHECK-NEXT: ret |
| 107 | +; |
| 108 | +; BE-LABEL: load_v3i8_to_4xi32_const_offset_1: |
| 109 | +; BE: // %bb.0: |
| 110 | +; BE-NEXT: sub sp, sp, #16 |
| 111 | +; BE-NEXT: .cfi_def_cfa_offset 16 |
| 112 | +; BE-NEXT: ldurh w8, [x0, #1] |
| 113 | +; BE-NEXT: movi v1.2d, #0x0000ff000000ff |
| 114 | +; BE-NEXT: strh w8, [sp, #12] |
| 115 | +; BE-NEXT: ldr s0, [sp, #12] |
| 116 | +; BE-NEXT: ldrsb w8, [x0, #3] |
| 117 | +; BE-NEXT: rev32 v0.8b, v0.8b |
| 118 | +; BE-NEXT: ushll v0.8h, v0.8b, #0 |
| 119 | +; BE-NEXT: mov v0.h[1], v0.h[1] |
| 120 | +; BE-NEXT: mov v0.h[2], w8 |
| 121 | +; BE-NEXT: ushll v0.4s, v0.4h, #0 |
| 122 | +; BE-NEXT: and v0.16b, v0.16b, v1.16b |
| 123 | +; BE-NEXT: rev64 v0.4s, v0.4s |
| 124 | +; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| 125 | +; BE-NEXT: add sp, sp, #16 |
| 126 | +; BE-NEXT: ret |
| 127 | + %src.1 = getelementptr inbounds i8, ptr %src, i64 1 |
| 128 | + %l = load <3 x i8>, ptr %src.1, align 1 |
| 129 | + %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef> |
| 130 | + %e = zext <4 x i8> %s to <4 x i32> |
| 131 | + ret <4 x i32> %e |
| 132 | +} |
| 133 | + |
| 134 | +define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) { |
| 135 | +; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3: |
| 136 | +; CHECK: ; %bb.0: |
| 137 | +; CHECK-NEXT: sub sp, sp, #16 |
| 138 | +; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| 139 | +; CHECK-NEXT: ldurh w8, [x0, #3] |
| 140 | +; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff |
| 141 | +; CHECK-NEXT: strh w8, [sp, #12] |
| 142 | +; CHECK-NEXT: ldr s0, [sp, #12] |
| 143 | +; CHECK-NEXT: ldrsb w8, [x0, #5] |
| 144 | +; CHECK-NEXT: ushll.8h v0, v0, #0 |
| 145 | +; CHECK-NEXT: mov.h v0[1], v0[1] |
| 146 | +; CHECK-NEXT: mov.h v0[2], w8 |
| 147 | +; CHECK-NEXT: ushll.4s v0, v0, #0 |
| 148 | +; CHECK-NEXT: and.16b v0, v0, v1 |
| 149 | +; CHECK-NEXT: add sp, sp, #16 |
| 150 | +; CHECK-NEXT: ret |
| 151 | +; |
| 152 | +; BE-LABEL: load_v3i8_to_4xi32_const_offset_3: |
| 153 | +; BE: // %bb.0: |
| 154 | +; BE-NEXT: sub sp, sp, #16 |
| 155 | +; BE-NEXT: .cfi_def_cfa_offset 16 |
| 156 | +; BE-NEXT: ldurh w8, [x0, #3] |
| 157 | +; BE-NEXT: movi v1.2d, #0x0000ff000000ff |
| 158 | +; BE-NEXT: strh w8, [sp, #12] |
| 159 | +; BE-NEXT: ldr s0, [sp, #12] |
| 160 | +; BE-NEXT: ldrsb w8, [x0, #5] |
| 161 | +; BE-NEXT: rev32 v0.8b, v0.8b |
| 162 | +; BE-NEXT: ushll v0.8h, v0.8b, #0 |
| 163 | +; BE-NEXT: mov v0.h[1], v0.h[1] |
| 164 | +; BE-NEXT: mov v0.h[2], w8 |
| 165 | +; BE-NEXT: ushll v0.4s, v0.4h, #0 |
| 166 | +; BE-NEXT: and v0.16b, v0.16b, v1.16b |
| 167 | +; BE-NEXT: rev64 v0.4s, v0.4s |
| 168 | +; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 |
| 169 | +; BE-NEXT: add sp, sp, #16 |
| 170 | +; BE-NEXT: ret |
| 171 | + %src.3 = getelementptr inbounds i8, ptr %src, i64 3 |
| 172 | + %l = load <3 x i8>, ptr %src.3, align 1 |
| 173 | + %s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef> |
| 174 | + %e = zext <4 x i8> %s to <4 x i32> |
| 175 | + ret <4 x i32> %e |
| 176 | +} |
| 177 | + |
| 178 | +define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src) { |
91 | 179 | ; CHECK-LABEL: volatile_load_v3i8_to_4xi32:
|
92 | 180 | ; CHECK: ; %bb.0:
|
93 | 181 | ; CHECK-NEXT: sub sp, sp, #16
|
@@ -271,6 +359,84 @@ define void @shift_trunc_store(ptr %src, ptr %dst) {
|
271 | 359 | ret void
|
272 | 360 | }
|
273 | 361 |
|
| 362 | +define void @shift_trunc_store_const_offset_1(ptr %src, ptr %dst) { |
| 363 | +; CHECK-LABEL: shift_trunc_store_const_offset_1: |
| 364 | +; CHECK: ; %bb.0: |
| 365 | +; CHECK-NEXT: sub sp, sp, #16 |
| 366 | +; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| 367 | +; CHECK-NEXT: ldr q0, [x0] |
| 368 | +; CHECK-NEXT: shrn.4h v0, v0, #16 |
| 369 | +; CHECK-NEXT: xtn.8b v1, v0 |
| 370 | +; CHECK-NEXT: umov.h w8, v0[2] |
| 371 | +; CHECK-NEXT: str s1, [sp, #12] |
| 372 | +; CHECK-NEXT: ldrh w9, [sp, #12] |
| 373 | +; CHECK-NEXT: strb w8, [x1, #3] |
| 374 | +; CHECK-NEXT: sturh w9, [x1, #1] |
| 375 | +; CHECK-NEXT: add sp, sp, #16 |
| 376 | +; CHECK-NEXT: ret |
| 377 | +; |
| 378 | +; BE-LABEL: shift_trunc_store_const_offset_1: |
| 379 | +; BE: // %bb.0: |
| 380 | +; BE-NEXT: sub sp, sp, #16 |
| 381 | +; BE-NEXT: .cfi_def_cfa_offset 16 |
| 382 | +; BE-NEXT: ld1 { v0.4s }, [x0] |
| 383 | +; BE-NEXT: shrn v0.4h, v0.4s, #16 |
| 384 | +; BE-NEXT: xtn v1.8b, v0.8h |
| 385 | +; BE-NEXT: umov w8, v0.h[2] |
| 386 | +; BE-NEXT: rev32 v1.16b, v1.16b |
| 387 | +; BE-NEXT: str s1, [sp, #12] |
| 388 | +; BE-NEXT: ldrh w9, [sp, #12] |
| 389 | +; BE-NEXT: strb w8, [x1, #3] |
| 390 | +; BE-NEXT: sturh w9, [x1, #1] |
| 391 | +; BE-NEXT: add sp, sp, #16 |
| 392 | +; BE-NEXT: ret |
| 393 | + %l = load <3 x i32>, ptr %src |
| 394 | + %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16> |
| 395 | + %t = trunc <3 x i32> %s to <3 x i8> |
| 396 | + %dst.1 = getelementptr inbounds i8, ptr %dst, i64 1 |
| 397 | + store <3 x i8> %t, ptr %dst.1, align 1 |
| 398 | + ret void |
| 399 | +} |
| 400 | + |
| 401 | +define void @shift_trunc_store_const_offset_3(ptr %src, ptr %dst) { |
| 402 | +; CHECK-LABEL: shift_trunc_store_const_offset_3: |
| 403 | +; CHECK: ; %bb.0: |
| 404 | +; CHECK-NEXT: sub sp, sp, #16 |
| 405 | +; CHECK-NEXT: .cfi_def_cfa_offset 16 |
| 406 | +; CHECK-NEXT: ldr q0, [x0] |
| 407 | +; CHECK-NEXT: shrn.4h v0, v0, #16 |
| 408 | +; CHECK-NEXT: xtn.8b v1, v0 |
| 409 | +; CHECK-NEXT: umov.h w8, v0[2] |
| 410 | +; CHECK-NEXT: str s1, [sp, #12] |
| 411 | +; CHECK-NEXT: ldrh w9, [sp, #12] |
| 412 | +; CHECK-NEXT: strb w8, [x1, #5] |
| 413 | +; CHECK-NEXT: sturh w9, [x1, #3] |
| 414 | +; CHECK-NEXT: add sp, sp, #16 |
| 415 | +; CHECK-NEXT: ret |
| 416 | +; |
| 417 | +; BE-LABEL: shift_trunc_store_const_offset_3: |
| 418 | +; BE: // %bb.0: |
| 419 | +; BE-NEXT: sub sp, sp, #16 |
| 420 | +; BE-NEXT: .cfi_def_cfa_offset 16 |
| 421 | +; BE-NEXT: ld1 { v0.4s }, [x0] |
| 422 | +; BE-NEXT: shrn v0.4h, v0.4s, #16 |
| 423 | +; BE-NEXT: xtn v1.8b, v0.8h |
| 424 | +; BE-NEXT: umov w8, v0.h[2] |
| 425 | +; BE-NEXT: rev32 v1.16b, v1.16b |
| 426 | +; BE-NEXT: str s1, [sp, #12] |
| 427 | +; BE-NEXT: ldrh w9, [sp, #12] |
| 428 | +; BE-NEXT: strb w8, [x1, #5] |
| 429 | +; BE-NEXT: sturh w9, [x1, #3] |
| 430 | +; BE-NEXT: add sp, sp, #16 |
| 431 | +; BE-NEXT: ret |
| 432 | + %l = load <3 x i32>, ptr %src |
| 433 | + %s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16> |
| 434 | + %t = trunc <3 x i32> %s to <3 x i8> |
| 435 | + %dst.3 = getelementptr inbounds i8, ptr %dst, i64 3 |
| 436 | + store <3 x i8> %t, ptr %dst.3, align 1 |
| 437 | + ret void |
| 438 | +} |
| 439 | + |
274 | 440 | define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
|
275 | 441 | ; CHECK-LABEL: shift_trunc_volatile_store:
|
276 | 442 | ; CHECK: ; %bb.0:
|
|
0 commit comments