5
5
define <16 x i8 > @load_v3i8 (ptr %src ) {
6
6
; CHECK-LABEL: load_v3i8:
7
7
; CHECK: ; %bb.0:
8
- ; CHECK-NEXT: ldrb w8, [x0, #2]
9
- ; CHECK-NEXT: ldrh w9, [x0]
10
- ; CHECK-NEXT: orr w8, w9, w8, lsl #16
11
- ; CHECK-NEXT: fmov s0, w8
8
+ ; CHECK-NEXT: ld1r.4h { v0 }, [x0], #2
9
+ ; CHECK-NEXT: ld1.b { v0 }[2], [x0]
12
10
; CHECK-NEXT: ret
13
11
;
14
12
; BE-LABEL: load_v3i8:
@@ -38,12 +36,9 @@ define <16 x i8> @load_v3i8(ptr %src) {
38
36
define <4 x i32 > @load_v3i8_to_4xi32 (ptr %src ) {
39
37
; CHECK-LABEL: load_v3i8_to_4xi32:
40
38
; CHECK: ; %bb.0:
41
- ; CHECK-NEXT: ldrb w8, [x0, #2]
42
- ; CHECK-NEXT: ldrh w9, [x0]
39
+ ; CHECK-NEXT: ld1r.4h { v0 }, [x0], #2
43
40
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
44
- ; CHECK-NEXT: orr w8, w9, w8, lsl #16
45
- ; CHECK-NEXT: fmov s0, w8
46
- ; CHECK-NEXT: zip1.8b v0, v0, v0
41
+ ; CHECK-NEXT: ld1.b { v0 }[2], [x0]
47
42
; CHECK-NEXT: ushll.4s v0, v0, #0
48
43
; CHECK-NEXT: and.16b v0, v0, v1
49
44
; CHECK-NEXT: ret
@@ -59,7 +54,6 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
59
54
; BE-NEXT: ldrsb w8, [x0, #2]
60
55
; BE-NEXT: rev32 v0.8b, v0.8b
61
56
; BE-NEXT: ushll v0.8h, v0.8b, #0
62
- ; BE-NEXT: mov v0.h[1], v0.h[1]
63
57
; BE-NEXT: mov v0.h[2], w8
64
58
; BE-NEXT: ushll v0.4s, v0.4h, #0
65
59
; BE-NEXT: and v0.16b, v0.16b, v1.16b
@@ -76,12 +70,9 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {
76
70
define <4 x i32 > @load_v3i8_to_4xi32_align_2 (ptr %src ) {
77
71
; CHECK-LABEL: load_v3i8_to_4xi32_align_2:
78
72
; CHECK: ; %bb.0:
79
- ; CHECK-NEXT: ldrb w8, [x0, #2]
80
- ; CHECK-NEXT: ldrh w9, [x0]
73
+ ; CHECK-NEXT: ld1r.4h { v0 }, [x0], #2
81
74
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
82
- ; CHECK-NEXT: orr w8, w9, w8, lsl #16
83
- ; CHECK-NEXT: fmov s0, w8
84
- ; CHECK-NEXT: zip1.8b v0, v0, v0
75
+ ; CHECK-NEXT: ld1.b { v0 }[2], [x0]
85
76
; CHECK-NEXT: ushll.4s v0, v0, #0
86
77
; CHECK-NEXT: and.16b v0, v0, v1
87
78
; CHECK-NEXT: ret
@@ -97,7 +88,6 @@ define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) {
97
88
; BE-NEXT: ldrsb w8, [x0, #2]
98
89
; BE-NEXT: rev32 v0.8b, v0.8b
99
90
; BE-NEXT: ushll v0.8h, v0.8b, #0
100
- ; BE-NEXT: mov v0.h[1], v0.h[1]
101
91
; BE-NEXT: mov v0.h[2], w8
102
92
; BE-NEXT: ushll v0.4s, v0.4h, #0
103
93
; BE-NEXT: and v0.16b, v0.16b, v1.16b
@@ -141,12 +131,11 @@ define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) {
141
131
define <4 x i32 > @load_v3i8_to_4xi32_const_offset_1 (ptr %src ) {
142
132
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:
143
133
; CHECK: ; %bb.0:
144
- ; CHECK-NEXT: ldrb w8, [x0, #3]
145
- ; CHECK-NEXT: ldurh w9, [x0, #1]
134
+ ; CHECK-NEXT: add x8, x0, #1
146
135
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
147
- ; CHECK-NEXT: orr w8, w9, w8, lsl #16
148
- ; CHECK-NEXT: fmov s0, w8
149
- ; CHECK-NEXT: zip1.8b v0, v0, v0
136
+ ; CHECK-NEXT: ld1r.4h { v0 }, [x8]
137
+ ; CHECK-NEXT: add x8, x0, #3
138
+ ; CHECK-NEXT: ld1.b { v0 }[2], [x8]
150
139
; CHECK-NEXT: ushll.4s v0, v0, #0
151
140
; CHECK-NEXT: and.16b v0, v0, v1
152
141
; CHECK-NEXT: ret
@@ -162,7 +151,6 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
162
151
; BE-NEXT: ldrsb w8, [x0, #3]
163
152
; BE-NEXT: rev32 v0.8b, v0.8b
164
153
; BE-NEXT: ushll v0.8h, v0.8b, #0
165
- ; BE-NEXT: mov v0.h[1], v0.h[1]
166
154
; BE-NEXT: mov v0.h[2], w8
167
155
; BE-NEXT: ushll v0.4s, v0.4h, #0
168
156
; BE-NEXT: and v0.16b, v0.16b, v1.16b
@@ -180,12 +168,11 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {
180
168
define <4 x i32 > @load_v3i8_to_4xi32_const_offset_3 (ptr %src ) {
181
169
; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:
182
170
; CHECK: ; %bb.0:
183
- ; CHECK-NEXT: ldrb w8, [x0, #5]
184
- ; CHECK-NEXT: ldurh w9, [x0, #3]
171
+ ; CHECK-NEXT: add x8, x0, #3
185
172
; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
186
- ; CHECK-NEXT: orr w8, w9, w8, lsl #16
187
- ; CHECK-NEXT: fmov s0, w8
188
- ; CHECK-NEXT: zip1.8b v0, v0, v0
173
+ ; CHECK-NEXT: ld1r.4h { v0 }, [x8]
174
+ ; CHECK-NEXT: add x8, x0, #5
175
+ ; CHECK-NEXT: ld1.b { v0 }[2], [x8]
189
176
; CHECK-NEXT: ushll.4s v0, v0, #0
190
177
; CHECK-NEXT: and.16b v0, v0, v1
191
178
; CHECK-NEXT: ret
@@ -201,7 +188,6 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {
201
188
; BE-NEXT: ldrsb w8, [x0, #5]
202
189
; BE-NEXT: rev32 v0.8b, v0.8b
203
190
; BE-NEXT: ushll v0.8h, v0.8b, #0
204
- ; BE-NEXT: mov v0.h[1], v0.h[1]
205
191
; BE-NEXT: mov v0.h[2], w8
206
192
; BE-NEXT: ushll v0.4s, v0.4h, #0
207
193
; BE-NEXT: and v0.16b, v0.16b, v1.16b
@@ -263,7 +249,6 @@ define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src) {
263
249
; CHECK-NEXT: ldr s0, [sp, #12]
264
250
; CHECK-NEXT: ldrsb w8, [x0, #2]
265
251
; CHECK-NEXT: ushll.8h v0, v0, #0
266
- ; CHECK-NEXT: mov.h v0[1], v0[1]
267
252
; CHECK-NEXT: mov.h v0[2], w8
268
253
; CHECK-NEXT: ushll.4s v0, v0, #0
269
254
; CHECK-NEXT: and.16b v0, v0, v1
@@ -281,7 +266,6 @@ define <4 x i32> @volatile_load_v3i8_to_4xi32(ptr %src) {
281
266
; BE-NEXT: ldrsb w8, [x0, #2]
282
267
; BE-NEXT: rev32 v0.8b, v0.8b
283
268
; BE-NEXT: ushll v0.8h, v0.8b, #0
284
- ; BE-NEXT: mov v0.h[1], v0.h[1]
285
269
; BE-NEXT: mov v0.h[2], w8
286
270
; BE-NEXT: ushll v0.4s, v0.4h, #0
287
271
; BE-NEXT: and v0.16b, v0.16b, v1.16b
@@ -410,12 +394,9 @@ entry:
410
394
define void @load_ext_to_64bits (ptr %src , ptr %dst ) {
411
395
; CHECK-LABEL: load_ext_to_64bits:
412
396
; CHECK: ; %bb.0: ; %entry
413
- ; CHECK-NEXT: ldrb w8, [x0, #2]
414
- ; CHECK-NEXT: ldrh w9, [x0]
415
- ; CHECK-NEXT: orr w8, w9, w8, lsl #16
416
- ; CHECK-NEXT: fmov s0, w8
397
+ ; CHECK-NEXT: ld1r.4h { v0 }, [x0], #2
417
398
; CHECK-NEXT: add x8, x1, #4
418
- ; CHECK-NEXT: zip1.8b v0, v0, v0
399
+ ; CHECK-NEXT: ld1.b { v0 }[2], [x0]
419
400
; CHECK-NEXT: bic.4h v0, #255, lsl #8
420
401
; CHECK-NEXT: st1.h { v0 }[2], [x8]
421
402
; CHECK-NEXT: str s0, [x1]
@@ -507,16 +488,13 @@ entry:
507
488
define void @load_ext_add_to_64bits (ptr %src , ptr %dst ) {
508
489
; CHECK-LABEL: load_ext_add_to_64bits:
509
490
; CHECK: ; %bb.0: ; %entry
510
- ; CHECK-NEXT: ldrb w9, [x0, #2]
511
- ; CHECK-NEXT: ldrh w10, [x0]
491
+ ; CHECK-NEXT: ld1r.4h { v0 }, [x0], #2
512
492
; CHECK-NEXT: Lloh2:
513
493
; CHECK-NEXT: adrp x8, lCPI13_0@PAGE
514
494
; CHECK-NEXT: Lloh3:
515
495
; CHECK-NEXT: ldr d1, [x8, lCPI13_0@PAGEOFF]
516
496
; CHECK-NEXT: add x8, x1, #4
517
- ; CHECK-NEXT: orr w9, w10, w9, lsl #16
518
- ; CHECK-NEXT: fmov s0, w9
519
- ; CHECK-NEXT: zip1.8b v0, v0, v0
497
+ ; CHECK-NEXT: ld1.b { v0 }[2], [x0]
520
498
; CHECK-NEXT: bic.4h v0, #255, lsl #8
521
499
; CHECK-NEXT: add.4h v0, v0, v1
522
500
; CHECK-NEXT: st1.h { v0 }[2], [x8]
0 commit comments