@@ -28,14 +28,12 @@ entry:
28
28
define <16 x i32 > @mul_i32 (<16 x i8 > %a , <16 x i8 > %b ) {
29
29
; CHECK-SD-LABEL: mul_i32:
30
30
; CHECK-SD: // %bb.0: // %entry
31
- ; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0
32
- ; CHECK-SD-NEXT: ushll v4.8h, v1.8b, #0
33
- ; CHECK-SD-NEXT: ushll2 v5.8h, v0.16b, #0
34
- ; CHECK-SD-NEXT: ushll2 v6.8h, v1.16b, #0
35
- ; CHECK-SD-NEXT: umull v0.4s, v2.4h, v4.4h
36
- ; CHECK-SD-NEXT: umull2 v1.4s, v2.8h, v4.8h
37
- ; CHECK-SD-NEXT: umull2 v3.4s, v5.8h, v6.8h
38
- ; CHECK-SD-NEXT: umull v2.4s, v5.4h, v6.4h
31
+ ; CHECK-SD-NEXT: umull v2.8h, v0.8b, v1.8b
32
+ ; CHECK-SD-NEXT: umull2 v4.8h, v0.16b, v1.16b
33
+ ; CHECK-SD-NEXT: ushll v0.4s, v2.4h, #0
34
+ ; CHECK-SD-NEXT: ushll2 v3.4s, v4.8h, #0
35
+ ; CHECK-SD-NEXT: ushll2 v1.4s, v2.8h, #0
36
+ ; CHECK-SD-NEXT: ushll v2.4s, v4.4h, #0
39
37
; CHECK-SD-NEXT: ret
40
38
;
41
39
; CHECK-GI-LABEL: mul_i32:
@@ -59,26 +57,20 @@ entry:
59
57
define <16 x i64 > @mul_i64 (<16 x i8 > %a , <16 x i8 > %b ) {
60
58
; CHECK-SD-LABEL: mul_i64:
61
59
; CHECK-SD: // %bb.0: // %entry
62
- ; CHECK-SD-NEXT: ushll v2.8h, v0.8b, #0
63
- ; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0
64
- ; CHECK-SD-NEXT: ushll v3.8h, v1.8b, #0
65
- ; CHECK-SD-NEXT: ushll2 v1.8h, v1.16b, #0
66
- ; CHECK-SD-NEXT: ushll v4.4s, v2.4h, #0
67
- ; CHECK-SD-NEXT: ushll v5.4s, v0.4h, #0
68
- ; CHECK-SD-NEXT: ushll v6.4s, v3.4h, #0
60
+ ; CHECK-SD-NEXT: umull v2.8h, v0.8b, v1.8b
61
+ ; CHECK-SD-NEXT: umull2 v0.8h, v0.16b, v1.16b
62
+ ; CHECK-SD-NEXT: ushll v3.4s, v2.4h, #0
69
63
; CHECK-SD-NEXT: ushll2 v2.4s, v2.8h, #0
70
- ; CHECK-SD-NEXT: ushll v16.4s, v1.4h, #0
71
- ; CHECK-SD-NEXT: ushll2 v7.4s, v3.8h, #0
72
- ; CHECK-SD-NEXT: ushll2 v17.4s, v0.8h, #0
73
- ; CHECK-SD-NEXT: ushll2 v18.4s, v1.8h, #0
74
- ; CHECK-SD-NEXT: umull2 v1.2d, v4.4s, v6.4s
75
- ; CHECK-SD-NEXT: umull v0.2d, v4.2s, v6.2s
76
- ; CHECK-SD-NEXT: umull2 v3.2d, v2.4s, v7.4s
77
- ; CHECK-SD-NEXT: umull v2.2d, v2.2s, v7.2s
78
- ; CHECK-SD-NEXT: umull v4.2d, v5.2s, v16.2s
79
- ; CHECK-SD-NEXT: umull2 v7.2d, v17.4s, v18.4s
80
- ; CHECK-SD-NEXT: umull2 v5.2d, v5.4s, v16.4s
81
- ; CHECK-SD-NEXT: umull v6.2d, v17.2s, v18.2s
64
+ ; CHECK-SD-NEXT: ushll v5.4s, v0.4h, #0
65
+ ; CHECK-SD-NEXT: ushll2 v6.4s, v0.8h, #0
66
+ ; CHECK-SD-NEXT: ushll2 v1.2d, v3.4s, #0
67
+ ; CHECK-SD-NEXT: ushll v0.2d, v3.2s, #0
68
+ ; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0
69
+ ; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0
70
+ ; CHECK-SD-NEXT: ushll v4.2d, v5.2s, #0
71
+ ; CHECK-SD-NEXT: ushll2 v7.2d, v6.4s, #0
72
+ ; CHECK-SD-NEXT: ushll2 v5.2d, v5.4s, #0
73
+ ; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0
82
74
; CHECK-SD-NEXT: ret
83
75
;
84
76
; CHECK-GI-LABEL: mul_i64:
@@ -139,17 +131,12 @@ entry:
139
131
define <16 x i32 > @mla_i32 (<16 x i8 > %a , <16 x i8 > %b , <16 x i32 > %c ) {
140
132
; CHECK-SD-LABEL: mla_i32:
141
133
; CHECK-SD: // %bb.0: // %entry
142
- ; CHECK-SD-NEXT: ushll v6.8h, v0.8b, #0
143
- ; CHECK-SD-NEXT: ushll v7.8h, v1.8b, #0
144
- ; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0
145
- ; CHECK-SD-NEXT: ushll2 v1.8h, v1.16b, #0
146
- ; CHECK-SD-NEXT: umlal v2.4s, v6.4h, v7.4h
147
- ; CHECK-SD-NEXT: umlal2 v3.4s, v6.8h, v7.8h
148
- ; CHECK-SD-NEXT: umlal2 v5.4s, v0.8h, v1.8h
149
- ; CHECK-SD-NEXT: umlal v4.4s, v0.4h, v1.4h
150
- ; CHECK-SD-NEXT: mov v0.16b, v2.16b
151
- ; CHECK-SD-NEXT: mov v1.16b, v3.16b
152
- ; CHECK-SD-NEXT: mov v2.16b, v4.16b
134
+ ; CHECK-SD-NEXT: umull2 v7.8h, v0.16b, v1.16b
135
+ ; CHECK-SD-NEXT: umull v6.8h, v0.8b, v1.8b
136
+ ; CHECK-SD-NEXT: uaddw2 v5.4s, v5.4s, v7.8h
137
+ ; CHECK-SD-NEXT: uaddw v0.4s, v2.4s, v6.4h
138
+ ; CHECK-SD-NEXT: uaddw2 v1.4s, v3.4s, v6.8h
139
+ ; CHECK-SD-NEXT: uaddw v2.4s, v4.4s, v7.4h
153
140
; CHECK-SD-NEXT: mov v3.16b, v5.16b
154
141
; CHECK-SD-NEXT: ret
155
142
;
@@ -179,35 +166,22 @@ entry:
179
166
define <16 x i64 > @mla_i64 (<16 x i8 > %a , <16 x i8 > %b , <16 x i64 > %c ) {
180
167
; CHECK-SD-LABEL: mla_i64:
181
168
; CHECK-SD: // %bb.0: // %entry
182
- ; CHECK-SD-NEXT: mov v17.16b, v7.16b
183
- ; CHECK-SD-NEXT: mov v16.16b, v6.16b
184
- ; CHECK-SD-NEXT: ushll v6.8h, v0.8b, #0
185
- ; CHECK-SD-NEXT: ushll2 v0.8h, v0.16b, #0
186
- ; CHECK-SD-NEXT: ushll v7.8h, v1.8b, #0
187
- ; CHECK-SD-NEXT: ushll2 v1.8h, v1.16b, #0
188
- ; CHECK-SD-NEXT: ushll v18.4s, v6.4h, #0
189
- ; CHECK-SD-NEXT: ushll2 v21.4s, v6.8h, #0
190
- ; CHECK-SD-NEXT: ushll v19.4s, v0.4h, #0
191
- ; CHECK-SD-NEXT: ushll v20.4s, v7.4h, #0
192
- ; CHECK-SD-NEXT: ushll v22.4s, v1.4h, #0
193
- ; CHECK-SD-NEXT: ushll2 v23.4s, v7.8h, #0
194
- ; CHECK-SD-NEXT: ldp q6, q7, [sp]
195
- ; CHECK-SD-NEXT: ushll2 v0.4s, v0.8h, #0
196
- ; CHECK-SD-NEXT: ushll2 v1.4s, v1.8h, #0
197
- ; CHECK-SD-NEXT: umlal2 v3.2d, v18.4s, v20.4s
198
- ; CHECK-SD-NEXT: umlal v2.2d, v18.2s, v20.2s
199
- ; CHECK-SD-NEXT: umlal v16.2d, v19.2s, v22.2s
200
- ; CHECK-SD-NEXT: umlal2 v5.2d, v21.4s, v23.4s
201
- ; CHECK-SD-NEXT: umlal v4.2d, v21.2s, v23.2s
202
- ; CHECK-SD-NEXT: umlal2 v17.2d, v19.4s, v22.4s
203
- ; CHECK-SD-NEXT: umlal2 v7.2d, v0.4s, v1.4s
204
- ; CHECK-SD-NEXT: umlal v6.2d, v0.2s, v1.2s
205
- ; CHECK-SD-NEXT: mov v0.16b, v2.16b
206
- ; CHECK-SD-NEXT: mov v1.16b, v3.16b
207
- ; CHECK-SD-NEXT: mov v2.16b, v4.16b
208
- ; CHECK-SD-NEXT: mov v3.16b, v5.16b
209
- ; CHECK-SD-NEXT: mov v4.16b, v16.16b
210
- ; CHECK-SD-NEXT: mov v5.16b, v17.16b
169
+ ; CHECK-SD-NEXT: umull v16.8h, v0.8b, v1.8b
170
+ ; CHECK-SD-NEXT: umull2 v0.8h, v0.16b, v1.16b
171
+ ; CHECK-SD-NEXT: ldp q20, q21, [sp]
172
+ ; CHECK-SD-NEXT: ushll v17.4s, v16.4h, #0
173
+ ; CHECK-SD-NEXT: ushll2 v16.4s, v16.8h, #0
174
+ ; CHECK-SD-NEXT: ushll2 v19.4s, v0.8h, #0
175
+ ; CHECK-SD-NEXT: ushll v18.4s, v0.4h, #0
176
+ ; CHECK-SD-NEXT: uaddw2 v1.2d, v3.2d, v17.4s
177
+ ; CHECK-SD-NEXT: uaddw v0.2d, v2.2d, v17.2s
178
+ ; CHECK-SD-NEXT: uaddw2 v3.2d, v5.2d, v16.4s
179
+ ; CHECK-SD-NEXT: uaddw v2.2d, v4.2d, v16.2s
180
+ ; CHECK-SD-NEXT: uaddw2 v16.2d, v21.2d, v19.4s
181
+ ; CHECK-SD-NEXT: uaddw v4.2d, v6.2d, v18.2s
182
+ ; CHECK-SD-NEXT: uaddw2 v5.2d, v7.2d, v18.4s
183
+ ; CHECK-SD-NEXT: uaddw v6.2d, v20.2d, v19.2s
184
+ ; CHECK-SD-NEXT: mov v7.16b, v16.16b
211
185
; CHECK-SD-NEXT: ret
212
186
;
213
187
; CHECK-GI-LABEL: mla_i64:
0 commit comments