@@ -131,26 +131,107 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
131
131
ret void
132
132
}
133
133
134
- ; FIXME:
135
- ; define half @f16_return(float %arg) #0 {
136
- ; %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
137
- ; ret half %fptrunc
138
- ; }
134
+ define half @f16_return (float %arg ) #0 {
135
+ ; NOFP16-LABEL: f16_return:
136
+ ; NOFP16: // %bb.0:
137
+ ; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
138
+ ; NOFP16-NEXT: .cfi_def_cfa_offset 16
139
+ ; NOFP16-NEXT: .cfi_offset w30, -16
140
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
141
+ ; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
142
+ ; NOFP16-NEXT: ret
143
+ %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32 (float %arg , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
144
+ ret half %fptrunc
145
+ }
139
146
140
- ; define <2 x half> @v2f16_return(<2 x float> %arg) #0 {
141
- ; %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
142
- ; ret <2 x half> %fptrunc
143
- ; }
147
+ define <2 x half > @v2f16_return (<2 x float > %arg ) #0 {
148
+ ; NOFP16-LABEL: v2f16_return:
149
+ ; NOFP16: // %bb.0:
150
+ ; NOFP16-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
151
+ ; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
152
+ ; NOFP16-NEXT: .cfi_def_cfa_offset 32
153
+ ; NOFP16-NEXT: .cfi_offset w19, -8
154
+ ; NOFP16-NEXT: .cfi_offset w20, -16
155
+ ; NOFP16-NEXT: .cfi_offset w30, -32
156
+ ; NOFP16-NEXT: mov w19, w0
157
+ ; NOFP16-NEXT: mov w0, w1
158
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
159
+ ; NOFP16-NEXT: mov w20, w0
160
+ ; NOFP16-NEXT: mov w0, w19
161
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
162
+ ; NOFP16-NEXT: mov w1, w20
163
+ ; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
164
+ ; NOFP16-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
165
+ ; NOFP16-NEXT: ret
166
+ %fptrunc = call <2 x half > @llvm.experimental.constrained.fptrunc.v2f16.v2f32 (<2 x float > %arg , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
167
+ ret <2 x half > %fptrunc
168
+ }
144
169
145
- ; define <3 x half> @v3f16_return(<3 x float> %arg) #0 {
146
- ; %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
147
- ; ret <3 x half> %fptrunc
148
- ; }
170
+ define <3 x half > @v3f16_return (<3 x float > %arg ) #0 {
171
+ ; NOFP16-LABEL: v3f16_return:
172
+ ; NOFP16: // %bb.0:
173
+ ; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
174
+ ; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
175
+ ; NOFP16-NEXT: .cfi_def_cfa_offset 32
176
+ ; NOFP16-NEXT: .cfi_offset w19, -8
177
+ ; NOFP16-NEXT: .cfi_offset w20, -16
178
+ ; NOFP16-NEXT: .cfi_offset w21, -24
179
+ ; NOFP16-NEXT: .cfi_offset w30, -32
180
+ ; NOFP16-NEXT: mov w20, w0
181
+ ; NOFP16-NEXT: mov w0, w2
182
+ ; NOFP16-NEXT: mov w19, w1
183
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
184
+ ; NOFP16-NEXT: mov w21, w0
185
+ ; NOFP16-NEXT: mov w0, w19
186
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
187
+ ; NOFP16-NEXT: mov w19, w0
188
+ ; NOFP16-NEXT: mov w0, w20
189
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
190
+ ; NOFP16-NEXT: mov w1, w19
191
+ ; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
192
+ ; NOFP16-NEXT: mov w2, w21
193
+ ; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
194
+ ; NOFP16-NEXT: ret
195
+ %fptrunc = call <3 x half > @llvm.experimental.constrained.fptrunc.v3f16.v3f32 (<3 x float > %arg , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
196
+ ret <3 x half > %fptrunc
197
+ }
149
198
150
- ; define <4 x half> @v4f16_return(<4 x float> %arg) #0 {
151
- ; %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict")
152
- ; ret <4 x half> %fptrunc
153
- ; }
199
+ define <4 x half > @v4f16_return (<4 x float > %arg ) #0 {
200
+ ; NOFP16-LABEL: v4f16_return:
201
+ ; NOFP16: // %bb.0:
202
+ ; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill
203
+ ; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
204
+ ; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
205
+ ; NOFP16-NEXT: .cfi_def_cfa_offset 48
206
+ ; NOFP16-NEXT: .cfi_offset w19, -8
207
+ ; NOFP16-NEXT: .cfi_offset w20, -16
208
+ ; NOFP16-NEXT: .cfi_offset w21, -24
209
+ ; NOFP16-NEXT: .cfi_offset w22, -32
210
+ ; NOFP16-NEXT: .cfi_offset w30, -48
211
+ ; NOFP16-NEXT: mov w21, w0
212
+ ; NOFP16-NEXT: mov w0, w3
213
+ ; NOFP16-NEXT: mov w19, w2
214
+ ; NOFP16-NEXT: mov w20, w1
215
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
216
+ ; NOFP16-NEXT: mov w22, w0
217
+ ; NOFP16-NEXT: mov w0, w19
218
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
219
+ ; NOFP16-NEXT: mov w19, w0
220
+ ; NOFP16-NEXT: mov w0, w20
221
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
222
+ ; NOFP16-NEXT: mov w20, w0
223
+ ; NOFP16-NEXT: mov w0, w21
224
+ ; NOFP16-NEXT: bl __gnu_f2h_ieee
225
+ ; NOFP16-NEXT: mov w1, w20
226
+ ; NOFP16-NEXT: mov w2, w19
227
+ ; NOFP16-NEXT: mov w3, w22
228
+ ; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
229
+ ; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload
230
+ ; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload
231
+ ; NOFP16-NEXT: ret
232
+ %fptrunc = call <4 x half > @llvm.experimental.constrained.fptrunc.v4f16.v4f32 (<4 x float > %arg , metadata !"round.tonearest" , metadata !"fpexcept.strict" )
233
+ ret <4 x half > %fptrunc
234
+ }
154
235
155
236
; FIXME:
156
237
; define void @outgoing_f16_arg(ptr %ptr) #0 {
0 commit comments