1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2
3
3
4
define <8 x i8 > @vabds8 (ptr %A , ptr %B ) nounwind {
4
- ;CHECK-LABEL: vabds8:
5
- ;CHECK: vabd.s8
5
+ ; CHECK-LABEL: vabds8:
6
+ ; CHECK: @ %bb.0:
7
+ ; CHECK-NEXT: vldr d16, [r1]
8
+ ; CHECK-NEXT: vldr d17, [r0]
9
+ ; CHECK-NEXT: vabd.s8 d16, d17, d16
10
+ ; CHECK-NEXT: vmov r0, r1, d16
11
+ ; CHECK-NEXT: mov pc, lr
6
12
%tmp1 = load <8 x i8 >, ptr %A
7
13
%tmp2 = load <8 x i8 >, ptr %B
8
14
%tmp3 = call <8 x i8 > @llvm.arm.neon.vabds.v8i8 (<8 x i8 > %tmp1 , <8 x i8 > %tmp2 )
9
15
ret <8 x i8 > %tmp3
10
16
}
11
17
12
18
define <4 x i16 > @vabds16 (ptr %A , ptr %B ) nounwind {
13
- ;CHECK-LABEL: vabds16:
14
- ;CHECK: vabd.s16
19
+ ; CHECK-LABEL: vabds16:
20
+ ; CHECK: @ %bb.0:
21
+ ; CHECK-NEXT: vldr d16, [r1]
22
+ ; CHECK-NEXT: vldr d17, [r0]
23
+ ; CHECK-NEXT: vabd.s16 d16, d17, d16
24
+ ; CHECK-NEXT: vmov r0, r1, d16
25
+ ; CHECK-NEXT: mov pc, lr
15
26
%tmp1 = load <4 x i16 >, ptr %A
16
27
%tmp2 = load <4 x i16 >, ptr %B
17
28
%tmp3 = call <4 x i16 > @llvm.arm.neon.vabds.v4i16 (<4 x i16 > %tmp1 , <4 x i16 > %tmp2 )
18
29
ret <4 x i16 > %tmp3
19
30
}
20
31
21
32
define <2 x i32 > @vabds32 (ptr %A , ptr %B ) nounwind {
22
- ;CHECK-LABEL: vabds32:
23
- ;CHECK: vabd.s32
33
+ ; CHECK-LABEL: vabds32:
34
+ ; CHECK: @ %bb.0:
35
+ ; CHECK-NEXT: vldr d16, [r1]
36
+ ; CHECK-NEXT: vldr d17, [r0]
37
+ ; CHECK-NEXT: vabd.s32 d16, d17, d16
38
+ ; CHECK-NEXT: vmov r0, r1, d16
39
+ ; CHECK-NEXT: mov pc, lr
24
40
%tmp1 = load <2 x i32 >, ptr %A
25
41
%tmp2 = load <2 x i32 >, ptr %B
26
42
%tmp3 = call <2 x i32 > @llvm.arm.neon.vabds.v2i32 (<2 x i32 > %tmp1 , <2 x i32 > %tmp2 )
27
43
ret <2 x i32 > %tmp3
28
44
}
29
45
30
46
define <8 x i8 > @vabdu8 (ptr %A , ptr %B ) nounwind {
31
- ;CHECK-LABEL: vabdu8:
32
- ;CHECK: vabd.u8
47
+ ; CHECK-LABEL: vabdu8:
48
+ ; CHECK: @ %bb.0:
49
+ ; CHECK-NEXT: vldr d16, [r1]
50
+ ; CHECK-NEXT: vldr d17, [r0]
51
+ ; CHECK-NEXT: vabd.u8 d16, d17, d16
52
+ ; CHECK-NEXT: vmov r0, r1, d16
53
+ ; CHECK-NEXT: mov pc, lr
33
54
%tmp1 = load <8 x i8 >, ptr %A
34
55
%tmp2 = load <8 x i8 >, ptr %B
35
56
%tmp3 = call <8 x i8 > @llvm.arm.neon.vabdu.v8i8 (<8 x i8 > %tmp1 , <8 x i8 > %tmp2 )
36
57
ret <8 x i8 > %tmp3
37
58
}
38
59
39
60
define <4 x i16 > @vabdu16 (ptr %A , ptr %B ) nounwind {
40
- ;CHECK-LABEL: vabdu16:
41
- ;CHECK: vabd.u16
61
+ ; CHECK-LABEL: vabdu16:
62
+ ; CHECK: @ %bb.0:
63
+ ; CHECK-NEXT: vldr d16, [r1]
64
+ ; CHECK-NEXT: vldr d17, [r0]
65
+ ; CHECK-NEXT: vabd.u16 d16, d17, d16
66
+ ; CHECK-NEXT: vmov r0, r1, d16
67
+ ; CHECK-NEXT: mov pc, lr
42
68
%tmp1 = load <4 x i16 >, ptr %A
43
69
%tmp2 = load <4 x i16 >, ptr %B
44
70
%tmp3 = call <4 x i16 > @llvm.arm.neon.vabdu.v4i16 (<4 x i16 > %tmp1 , <4 x i16 > %tmp2 )
45
71
ret <4 x i16 > %tmp3
46
72
}
47
73
48
74
define <2 x i32 > @vabdu32 (ptr %A , ptr %B ) nounwind {
49
- ;CHECK-LABEL: vabdu32:
50
- ;CHECK: vabd.u32
75
+ ; CHECK-LABEL: vabdu32:
76
+ ; CHECK: @ %bb.0:
77
+ ; CHECK-NEXT: vldr d16, [r1]
78
+ ; CHECK-NEXT: vldr d17, [r0]
79
+ ; CHECK-NEXT: vabd.u32 d16, d17, d16
80
+ ; CHECK-NEXT: vmov r0, r1, d16
81
+ ; CHECK-NEXT: mov pc, lr
51
82
%tmp1 = load <2 x i32 >, ptr %A
52
83
%tmp2 = load <2 x i32 >, ptr %B
53
84
%tmp3 = call <2 x i32 > @llvm.arm.neon.vabdu.v2i32 (<2 x i32 > %tmp1 , <2 x i32 > %tmp2 )
54
85
ret <2 x i32 > %tmp3
55
86
}
56
87
57
88
define <2 x float > @vabdf32 (ptr %A , ptr %B ) nounwind {
58
- ;CHECK-LABEL: vabdf32:
59
- ;CHECK: vabd.f32
89
+ ; CHECK-LABEL: vabdf32:
90
+ ; CHECK: @ %bb.0:
91
+ ; CHECK-NEXT: vldr d16, [r1]
92
+ ; CHECK-NEXT: vldr d17, [r0]
93
+ ; CHECK-NEXT: vabd.f32 d16, d17, d16
94
+ ; CHECK-NEXT: vmov r0, r1, d16
95
+ ; CHECK-NEXT: mov pc, lr
60
96
%tmp1 = load <2 x float >, ptr %A
61
97
%tmp2 = load <2 x float >, ptr %B
62
98
%tmp3 = call <2 x float > @llvm.arm.neon.vabds.v2f32 (<2 x float > %tmp1 , <2 x float > %tmp2 )
63
99
ret <2 x float > %tmp3
64
100
}
65
101
66
102
define <16 x i8 > @vabdQs8 (ptr %A , ptr %B ) nounwind {
67
- ;CHECK-LABEL: vabdQs8:
68
- ;CHECK: vabd.s8
103
+ ; CHECK-LABEL: vabdQs8:
104
+ ; CHECK: @ %bb.0:
105
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
106
+ ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
107
+ ; CHECK-NEXT: vabd.s8 q8, q9, q8
108
+ ; CHECK-NEXT: vmov r0, r1, d16
109
+ ; CHECK-NEXT: vmov r2, r3, d17
110
+ ; CHECK-NEXT: mov pc, lr
69
111
%tmp1 = load <16 x i8 >, ptr %A
70
112
%tmp2 = load <16 x i8 >, ptr %B
71
113
%tmp3 = call <16 x i8 > @llvm.arm.neon.vabds.v16i8 (<16 x i8 > %tmp1 , <16 x i8 > %tmp2 )
72
114
ret <16 x i8 > %tmp3
73
115
}
74
116
75
117
define <8 x i16 > @vabdQs16 (ptr %A , ptr %B ) nounwind {
76
- ;CHECK-LABEL: vabdQs16:
77
- ;CHECK: vabd.s16
118
+ ; CHECK-LABEL: vabdQs16:
119
+ ; CHECK: @ %bb.0:
120
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
121
+ ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
122
+ ; CHECK-NEXT: vabd.s16 q8, q9, q8
123
+ ; CHECK-NEXT: vmov r0, r1, d16
124
+ ; CHECK-NEXT: vmov r2, r3, d17
125
+ ; CHECK-NEXT: mov pc, lr
78
126
%tmp1 = load <8 x i16 >, ptr %A
79
127
%tmp2 = load <8 x i16 >, ptr %B
80
128
%tmp3 = call <8 x i16 > @llvm.arm.neon.vabds.v8i16 (<8 x i16 > %tmp1 , <8 x i16 > %tmp2 )
81
129
ret <8 x i16 > %tmp3
82
130
}
83
131
84
132
define <4 x i32 > @vabdQs32 (ptr %A , ptr %B ) nounwind {
85
- ;CHECK-LABEL: vabdQs32:
86
- ;CHECK: vabd.s32
133
+ ; CHECK-LABEL: vabdQs32:
134
+ ; CHECK: @ %bb.0:
135
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
136
+ ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
137
+ ; CHECK-NEXT: vabd.s32 q8, q9, q8
138
+ ; CHECK-NEXT: vmov r0, r1, d16
139
+ ; CHECK-NEXT: vmov r2, r3, d17
140
+ ; CHECK-NEXT: mov pc, lr
87
141
%tmp1 = load <4 x i32 >, ptr %A
88
142
%tmp2 = load <4 x i32 >, ptr %B
89
143
%tmp3 = call <4 x i32 > @llvm.arm.neon.vabds.v4i32 (<4 x i32 > %tmp1 , <4 x i32 > %tmp2 )
90
144
ret <4 x i32 > %tmp3
91
145
}
92
146
93
147
define <16 x i8 > @vabdQu8 (ptr %A , ptr %B ) nounwind {
94
- ;CHECK-LABEL: vabdQu8:
95
- ;CHECK: vabd.u8
148
+ ; CHECK-LABEL: vabdQu8:
149
+ ; CHECK: @ %bb.0:
150
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
151
+ ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
152
+ ; CHECK-NEXT: vabd.u8 q8, q9, q8
153
+ ; CHECK-NEXT: vmov r0, r1, d16
154
+ ; CHECK-NEXT: vmov r2, r3, d17
155
+ ; CHECK-NEXT: mov pc, lr
96
156
%tmp1 = load <16 x i8 >, ptr %A
97
157
%tmp2 = load <16 x i8 >, ptr %B
98
158
%tmp3 = call <16 x i8 > @llvm.arm.neon.vabdu.v16i8 (<16 x i8 > %tmp1 , <16 x i8 > %tmp2 )
99
159
ret <16 x i8 > %tmp3
100
160
}
101
161
102
162
define <8 x i16 > @vabdQu16 (ptr %A , ptr %B ) nounwind {
103
- ;CHECK-LABEL: vabdQu16:
104
- ;CHECK: vabd.u16
163
+ ; CHECK-LABEL: vabdQu16:
164
+ ; CHECK: @ %bb.0:
165
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
166
+ ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
167
+ ; CHECK-NEXT: vabd.u16 q8, q9, q8
168
+ ; CHECK-NEXT: vmov r0, r1, d16
169
+ ; CHECK-NEXT: vmov r2, r3, d17
170
+ ; CHECK-NEXT: mov pc, lr
105
171
%tmp1 = load <8 x i16 >, ptr %A
106
172
%tmp2 = load <8 x i16 >, ptr %B
107
173
%tmp3 = call <8 x i16 > @llvm.arm.neon.vabdu.v8i16 (<8 x i16 > %tmp1 , <8 x i16 > %tmp2 )
108
174
ret <8 x i16 > %tmp3
109
175
}
110
176
111
177
define <4 x i32 > @vabdQu32 (ptr %A , ptr %B ) nounwind {
112
- ;CHECK-LABEL: vabdQu32:
113
- ;CHECK: vabd.u32
178
+ ; CHECK-LABEL: vabdQu32:
179
+ ; CHECK: @ %bb.0:
180
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
181
+ ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
182
+ ; CHECK-NEXT: vabd.u32 q8, q9, q8
183
+ ; CHECK-NEXT: vmov r0, r1, d16
184
+ ; CHECK-NEXT: vmov r2, r3, d17
185
+ ; CHECK-NEXT: mov pc, lr
114
186
%tmp1 = load <4 x i32 >, ptr %A
115
187
%tmp2 = load <4 x i32 >, ptr %B
116
188
%tmp3 = call <4 x i32 > @llvm.arm.neon.vabdu.v4i32 (<4 x i32 > %tmp1 , <4 x i32 > %tmp2 )
117
189
ret <4 x i32 > %tmp3
118
190
}
119
191
120
192
define <4 x float > @vabdQf32 (ptr %A , ptr %B ) nounwind {
121
- ;CHECK-LABEL: vabdQf32:
122
- ;CHECK: vabd.f32
193
+ ; CHECK-LABEL: vabdQf32:
194
+ ; CHECK: @ %bb.0:
195
+ ; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
196
+ ; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
197
+ ; CHECK-NEXT: vabd.f32 q8, q9, q8
198
+ ; CHECK-NEXT: vmov r0, r1, d16
199
+ ; CHECK-NEXT: vmov r2, r3, d17
200
+ ; CHECK-NEXT: mov pc, lr
123
201
%tmp1 = load <4 x float >, ptr %A
124
202
%tmp2 = load <4 x float >, ptr %B
125
203
%tmp3 = call <4 x float > @llvm.arm.neon.vabds.v4f32 (<4 x float > %tmp1 , <4 x float > %tmp2 )
@@ -147,8 +225,14 @@ declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind read
147
225
declare <4 x float > @llvm.arm.neon.vabds.v4f32 (<4 x float >, <4 x float >) nounwind readnone
148
226
149
227
define <8 x i16 > @vabdls8 (ptr %A , ptr %B ) nounwind {
150
- ;CHECK-LABEL: vabdls8:
151
- ;CHECK: vabdl.s8
228
+ ; CHECK-LABEL: vabdls8:
229
+ ; CHECK: @ %bb.0:
230
+ ; CHECK-NEXT: vldr d16, [r1]
231
+ ; CHECK-NEXT: vldr d17, [r0]
232
+ ; CHECK-NEXT: vabdl.s8 q8, d17, d16
233
+ ; CHECK-NEXT: vmov r0, r1, d16
234
+ ; CHECK-NEXT: vmov r2, r3, d17
235
+ ; CHECK-NEXT: mov pc, lr
152
236
%tmp1 = load <8 x i8 >, ptr %A
153
237
%tmp2 = load <8 x i8 >, ptr %B
154
238
%tmp3 = call <8 x i8 > @llvm.arm.neon.vabds.v8i8 (<8 x i8 > %tmp1 , <8 x i8 > %tmp2 )
@@ -157,8 +241,14 @@ define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind {
157
241
}
158
242
159
243
define <4 x i32 > @vabdls16 (ptr %A , ptr %B ) nounwind {
160
- ;CHECK-LABEL: vabdls16:
161
- ;CHECK: vabdl.s16
244
+ ; CHECK-LABEL: vabdls16:
245
+ ; CHECK: @ %bb.0:
246
+ ; CHECK-NEXT: vldr d16, [r1]
247
+ ; CHECK-NEXT: vldr d17, [r0]
248
+ ; CHECK-NEXT: vabdl.s16 q8, d17, d16
249
+ ; CHECK-NEXT: vmov r0, r1, d16
250
+ ; CHECK-NEXT: vmov r2, r3, d17
251
+ ; CHECK-NEXT: mov pc, lr
162
252
%tmp1 = load <4 x i16 >, ptr %A
163
253
%tmp2 = load <4 x i16 >, ptr %B
164
254
%tmp3 = call <4 x i16 > @llvm.arm.neon.vabds.v4i16 (<4 x i16 > %tmp1 , <4 x i16 > %tmp2 )
@@ -167,8 +257,14 @@ define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind {
167
257
}
168
258
169
259
define <2 x i64 > @vabdls32 (ptr %A , ptr %B ) nounwind {
170
- ;CHECK-LABEL: vabdls32:
171
- ;CHECK: vabdl.s32
260
+ ; CHECK-LABEL: vabdls32:
261
+ ; CHECK: @ %bb.0:
262
+ ; CHECK-NEXT: vldr d16, [r1]
263
+ ; CHECK-NEXT: vldr d17, [r0]
264
+ ; CHECK-NEXT: vabdl.s32 q8, d17, d16
265
+ ; CHECK-NEXT: vmov r0, r1, d16
266
+ ; CHECK-NEXT: vmov r2, r3, d17
267
+ ; CHECK-NEXT: mov pc, lr
172
268
%tmp1 = load <2 x i32 >, ptr %A
173
269
%tmp2 = load <2 x i32 >, ptr %B
174
270
%tmp3 = call <2 x i32 > @llvm.arm.neon.vabds.v2i32 (<2 x i32 > %tmp1 , <2 x i32 > %tmp2 )
@@ -177,8 +273,14 @@ define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind {
177
273
}
178
274
179
275
define <8 x i16 > @vabdlu8 (ptr %A , ptr %B ) nounwind {
180
- ;CHECK-LABEL: vabdlu8:
181
- ;CHECK: vabdl.u8
276
+ ; CHECK-LABEL: vabdlu8:
277
+ ; CHECK: @ %bb.0:
278
+ ; CHECK-NEXT: vldr d16, [r1]
279
+ ; CHECK-NEXT: vldr d17, [r0]
280
+ ; CHECK-NEXT: vabdl.u8 q8, d17, d16
281
+ ; CHECK-NEXT: vmov r0, r1, d16
282
+ ; CHECK-NEXT: vmov r2, r3, d17
283
+ ; CHECK-NEXT: mov pc, lr
182
284
%tmp1 = load <8 x i8 >, ptr %A
183
285
%tmp2 = load <8 x i8 >, ptr %B
184
286
%tmp3 = call <8 x i8 > @llvm.arm.neon.vabdu.v8i8 (<8 x i8 > %tmp1 , <8 x i8 > %tmp2 )
@@ -187,8 +289,14 @@ define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind {
187
289
}
188
290
189
291
define <4 x i32 > @vabdlu16 (ptr %A , ptr %B ) nounwind {
190
- ;CHECK-LABEL: vabdlu16:
191
- ;CHECK: vabdl.u16
292
+ ; CHECK-LABEL: vabdlu16:
293
+ ; CHECK: @ %bb.0:
294
+ ; CHECK-NEXT: vldr d16, [r1]
295
+ ; CHECK-NEXT: vldr d17, [r0]
296
+ ; CHECK-NEXT: vabdl.u16 q8, d17, d16
297
+ ; CHECK-NEXT: vmov r0, r1, d16
298
+ ; CHECK-NEXT: vmov r2, r3, d17
299
+ ; CHECK-NEXT: mov pc, lr
192
300
%tmp1 = load <4 x i16 >, ptr %A
193
301
%tmp2 = load <4 x i16 >, ptr %B
194
302
%tmp3 = call <4 x i16 > @llvm.arm.neon.vabdu.v4i16 (<4 x i16 > %tmp1 , <4 x i16 > %tmp2 )
@@ -197,8 +305,14 @@ define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind {
197
305
}
198
306
199
307
define <2 x i64 > @vabdlu32 (ptr %A , ptr %B ) nounwind {
200
- ;CHECK-LABEL: vabdlu32:
201
- ;CHECK: vabdl.u32
308
+ ; CHECK-LABEL: vabdlu32:
309
+ ; CHECK: @ %bb.0:
310
+ ; CHECK-NEXT: vldr d16, [r1]
311
+ ; CHECK-NEXT: vldr d17, [r0]
312
+ ; CHECK-NEXT: vabdl.u32 q8, d17, d16
313
+ ; CHECK-NEXT: vmov r0, r1, d16
314
+ ; CHECK-NEXT: vmov r2, r3, d17
315
+ ; CHECK-NEXT: mov pc, lr
202
316
%tmp1 = load <2 x i32 >, ptr %A
203
317
%tmp2 = load <2 x i32 >, ptr %B
204
318
%tmp3 = call <2 x i32 > @llvm.arm.neon.vabdu.v2i32 (<2 x i32 > %tmp1 , <2 x i32 > %tmp2 )
0 commit comments