Skip to content

Commit ff1cde5

Browse files
committed
[AArch64] Add vec3 load/store tests with GEPs with const offsets.
Extra tests for #78637 #78632
1 parent 3440466 commit ff1cde5

File tree

1 file changed

+169
-3
lines changed

1 file changed

+169
-3
lines changed

llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll

Lines changed: 169 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: llc -mtriple=arm64-apple-macosx -o - %s | FileCheck %s
33
; RUN: llc -mtriple=aarch64_be -o - %s | FileCheck --check-prefix BE %s
44

5-
define <16 x i8> @load_v3i8(ptr %src, ptr %dst) {
5+
define <16 x i8> @load_v3i8(ptr %src) {
66
; CHECK-LABEL: load_v3i8:
77
; CHECK: ; %bb.0:
88
; CHECK-NEXT: sub sp, sp, #16
@@ -44,7 +44,7 @@ define <16 x i8> @load_v3i8(ptr %src, ptr %dst) {
4444
ret <16 x i8> %s
4545
}
4646

47-
define <4 x i32> @load_v3i8_to_4xi32(ptr %src, ptr %dst) {
47+
define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
4848
; CHECK-LABEL: load_v3i8_to_4xi32:
4949
; CHECK: ; %bb.0:
5050
; CHECK-NEXT: sub sp, sp, #16
@@ -87,7 +87,95 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src, ptr %dst) {
8787
ret <4 x i32> %e
8888
}
8989

90-
define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src, ptr %dst) {
90+
define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
91+
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
92+
; CHECK: ; %bb.0:
93+
; CHECK-NEXT: sub sp, sp, #16
94+
; CHECK-NEXT: .cfi_def_cfa_offset 16
95+
; CHECK-NEXT: ldurh w8, [x0, #1]
96+
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
97+
; CHECK-NEXT: strh w8, [sp, #12]
98+
; CHECK-NEXT: ldr s0, [sp, #12]
99+
; CHECK-NEXT: ldrsb w8, [x0, #3]
100+
; CHECK-NEXT: ushll.8h v0, v0, #0
101+
; CHECK-NEXT: mov.h v0[1], v0[1]
102+
; CHECK-NEXT: mov.h v0[2], w8
103+
; CHECK-NEXT: ushll.4s v0, v0, #0
104+
; CHECK-NEXT: and.16b v0, v0, v1
105+
; CHECK-NEXT: add sp, sp, #16
106+
; CHECK-NEXT: ret
107+
;
108+
; BE-LABEL: load_v3i8_to_4xi32_const_offset_1:
109+
; BE: // %bb.0:
110+
; BE-NEXT: sub sp, sp, #16
111+
; BE-NEXT: .cfi_def_cfa_offset 16
112+
; BE-NEXT: ldurh w8, [x0, #1]
113+
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
114+
; BE-NEXT: strh w8, [sp, #12]
115+
; BE-NEXT: ldr s0, [sp, #12]
116+
; BE-NEXT: ldrsb w8, [x0, #3]
117+
; BE-NEXT: rev32 v0.8b, v0.8b
118+
; BE-NEXT: ushll v0.8h, v0.8b, #0
119+
; BE-NEXT: mov v0.h[1], v0.h[1]
120+
; BE-NEXT: mov v0.h[2], w8
121+
; BE-NEXT: ushll v0.4s, v0.4h, #0
122+
; BE-NEXT: and v0.16b, v0.16b, v1.16b
123+
; BE-NEXT: rev64 v0.4s, v0.4s
124+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
125+
; BE-NEXT: add sp, sp, #16
126+
; BE-NEXT: ret
127+
%src.1 = getelementptr inbounds i8, ptr %src, i64 1
128+
%l = load <3 x i8>, ptr %src.1, align 1
129+
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
130+
%e = zext <4 x i8> %s to <4 x i32>
131+
ret <4 x i32> %e
132+
}
133+
134+
define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
135+
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:
136+
; CHECK: ; %bb.0:
137+
; CHECK-NEXT: sub sp, sp, #16
138+
; CHECK-NEXT: .cfi_def_cfa_offset 16
139+
; CHECK-NEXT: ldurh w8, [x0, #3]
140+
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
141+
; CHECK-NEXT: strh w8, [sp, #12]
142+
; CHECK-NEXT: ldr s0, [sp, #12]
143+
; CHECK-NEXT: ldrsb w8, [x0, #5]
144+
; CHECK-NEXT: ushll.8h v0, v0, #0
145+
; CHECK-NEXT: mov.h v0[1], v0[1]
146+
; CHECK-NEXT: mov.h v0[2], w8
147+
; CHECK-NEXT: ushll.4s v0, v0, #0
148+
; CHECK-NEXT: and.16b v0, v0, v1
149+
; CHECK-NEXT: add sp, sp, #16
150+
; CHECK-NEXT: ret
151+
;
152+
; BE-LABEL: load_v3i8_to_4xi32_const_offset_3:
153+
; BE: // %bb.0:
154+
; BE-NEXT: sub sp, sp, #16
155+
; BE-NEXT: .cfi_def_cfa_offset 16
156+
; BE-NEXT: ldurh w8, [x0, #3]
157+
; BE-NEXT: movi v1.2d, #0x0000ff000000ff
158+
; BE-NEXT: strh w8, [sp, #12]
159+
; BE-NEXT: ldr s0, [sp, #12]
160+
; BE-NEXT: ldrsb w8, [x0, #5]
161+
; BE-NEXT: rev32 v0.8b, v0.8b
162+
; BE-NEXT: ushll v0.8h, v0.8b, #0
163+
; BE-NEXT: mov v0.h[1], v0.h[1]
164+
; BE-NEXT: mov v0.h[2], w8
165+
; BE-NEXT: ushll v0.4s, v0.4h, #0
166+
; BE-NEXT: and v0.16b, v0.16b, v1.16b
167+
; BE-NEXT: rev64 v0.4s, v0.4s
168+
; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
169+
; BE-NEXT: add sp, sp, #16
170+
; BE-NEXT: ret
171+
%src.3 = getelementptr inbounds i8, ptr %src, i64 3
172+
%l = load <3 x i8>, ptr %src.3, align 1
173+
%s = shufflevector <3 x i8> poison, <3 x i8> %l, <4 x i32> <i32 3, i32 4, i32 5, i32 undef>
174+
%e = zext <4 x i8> %s to <4 x i32>
175+
ret <4 x i32> %e
176+
}
177+
178+
define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src) {
91179
; CHECK-LABEL: volatile_load_v3i8_to_4xi32:
92180
; CHECK: ; %bb.0:
93181
; CHECK-NEXT: sub sp, sp, #16
@@ -271,6 +359,84 @@ define void @shift_trunc_store(ptr %src, ptr %dst) {
271359
ret void
272360
}
273361

362+
define void @shift_trunc_store_const_offset_1(ptr %src, ptr %dst) {
363+
; CHECK-LABEL: shift_trunc_store_const_offset_1:
364+
; CHECK: ; %bb.0:
365+
; CHECK-NEXT: sub sp, sp, #16
366+
; CHECK-NEXT: .cfi_def_cfa_offset 16
367+
; CHECK-NEXT: ldr q0, [x0]
368+
; CHECK-NEXT: shrn.4h v0, v0, #16
369+
; CHECK-NEXT: xtn.8b v1, v0
370+
; CHECK-NEXT: umov.h w8, v0[2]
371+
; CHECK-NEXT: str s1, [sp, #12]
372+
; CHECK-NEXT: ldrh w9, [sp, #12]
373+
; CHECK-NEXT: strb w8, [x1, #3]
374+
; CHECK-NEXT: sturh w9, [x1, #1]
375+
; CHECK-NEXT: add sp, sp, #16
376+
; CHECK-NEXT: ret
377+
;
378+
; BE-LABEL: shift_trunc_store_const_offset_1:
379+
; BE: // %bb.0:
380+
; BE-NEXT: sub sp, sp, #16
381+
; BE-NEXT: .cfi_def_cfa_offset 16
382+
; BE-NEXT: ld1 { v0.4s }, [x0]
383+
; BE-NEXT: shrn v0.4h, v0.4s, #16
384+
; BE-NEXT: xtn v1.8b, v0.8h
385+
; BE-NEXT: umov w8, v0.h[2]
386+
; BE-NEXT: rev32 v1.16b, v1.16b
387+
; BE-NEXT: str s1, [sp, #12]
388+
; BE-NEXT: ldrh w9, [sp, #12]
389+
; BE-NEXT: strb w8, [x1, #3]
390+
; BE-NEXT: sturh w9, [x1, #1]
391+
; BE-NEXT: add sp, sp, #16
392+
; BE-NEXT: ret
393+
%l = load <3 x i32>, ptr %src
394+
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
395+
%t = trunc <3 x i32> %s to <3 x i8>
396+
%dst.1 = getelementptr inbounds i8, ptr %dst, i64 1
397+
store <3 x i8> %t, ptr %dst.1, align 1
398+
ret void
399+
}
400+
401+
define void @shift_trunc_store_const_offset_3(ptr %src, ptr %dst) {
402+
; CHECK-LABEL: shift_trunc_store_const_offset_3:
403+
; CHECK: ; %bb.0:
404+
; CHECK-NEXT: sub sp, sp, #16
405+
; CHECK-NEXT: .cfi_def_cfa_offset 16
406+
; CHECK-NEXT: ldr q0, [x0]
407+
; CHECK-NEXT: shrn.4h v0, v0, #16
408+
; CHECK-NEXT: xtn.8b v1, v0
409+
; CHECK-NEXT: umov.h w8, v0[2]
410+
; CHECK-NEXT: str s1, [sp, #12]
411+
; CHECK-NEXT: ldrh w9, [sp, #12]
412+
; CHECK-NEXT: strb w8, [x1, #5]
413+
; CHECK-NEXT: sturh w9, [x1, #3]
414+
; CHECK-NEXT: add sp, sp, #16
415+
; CHECK-NEXT: ret
416+
;
417+
; BE-LABEL: shift_trunc_store_const_offset_3:
418+
; BE: // %bb.0:
419+
; BE-NEXT: sub sp, sp, #16
420+
; BE-NEXT: .cfi_def_cfa_offset 16
421+
; BE-NEXT: ld1 { v0.4s }, [x0]
422+
; BE-NEXT: shrn v0.4h, v0.4s, #16
423+
; BE-NEXT: xtn v1.8b, v0.8h
424+
; BE-NEXT: umov w8, v0.h[2]
425+
; BE-NEXT: rev32 v1.16b, v1.16b
426+
; BE-NEXT: str s1, [sp, #12]
427+
; BE-NEXT: ldrh w9, [sp, #12]
428+
; BE-NEXT: strb w8, [x1, #5]
429+
; BE-NEXT: sturh w9, [x1, #3]
430+
; BE-NEXT: add sp, sp, #16
431+
; BE-NEXT: ret
432+
%l = load <3 x i32>, ptr %src
433+
%s = lshr <3 x i32> %l, <i32 16, i32 16, i32 16>
434+
%t = trunc <3 x i32> %s to <3 x i8>
435+
%dst.3 = getelementptr inbounds i8, ptr %dst, i64 3
436+
store <3 x i8> %t, ptr %dst.3, align 1
437+
ret void
438+
}
439+
274440
define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
275441
; CHECK-LABEL: shift_trunc_volatile_store:
276442
; CHECK: ; %bb.0:

0 commit comments

Comments
 (0)