@@ -154,4 +154,226 @@ define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2
154
154
ret <2 x double > %pow_sign1
155
155
}
156
156
157
+ define float @copysign_f32_f32_sign_known_p0_or_n0 (float %x , i32 %y.i ) {
158
+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0:
159
+ ; GFX9: ; %bb.0:
160
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
162
+ ; GFX9-NEXT: s_brev_b32 s4, -2
163
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
164
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
165
+ %y.even = shl i32 %y.i , 31
166
+ %y.even.as.f32 = bitcast i32 %y.even to float
167
+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
168
+ ret float %copysign
169
+ }
170
+
171
+ define double @copysign_f64_f32_sign_known_p0_or_n0 (double %x , i32 %y.i ) {
172
+ ; GFX9-LABEL: copysign_f64_f32_sign_known_p0_or_n0:
173
+ ; GFX9: ; %bb.0:
174
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v2
176
+ ; GFX9-NEXT: s_brev_b32 s4, -2
177
+ ; GFX9-NEXT: v_bfi_b32 v1, s4, v1, v2
178
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
179
+ %y.even = shl i32 %y.i , 31
180
+ %y.even.as.f32 = bitcast i32 %y.even to float
181
+ %y.even.as.f32.fpext = fpext float %y.even.as.f32 to double
182
+ %copysign = call double @llvm.copysign.f64 (double %x , double %y.even.as.f32.fpext )
183
+ ret double %copysign
184
+ }
185
+
186
+ define half @copysign_f16_f32_sign_known_p0_or_n0 (half %x , i32 %y.i ) {
187
+ ; GFX9-LABEL: copysign_f16_f32_sign_known_p0_or_n0:
188
+ ; GFX9: ; %bb.0:
189
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
191
+ ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
192
+ ; GFX9-NEXT: s_movk_i32 s4, 0x7fff
193
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
194
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
195
+ %y.even = shl i32 %y.i , 31
196
+ %y.even.as.f32 = bitcast i32 %y.even to float
197
+ %y.even.as.f32.fptrunc = fptrunc float %y.even.as.f32 to half
198
+ %copysign = call half @llvm.copysign.f16 (half %x , half %y.even.as.f32.fptrunc )
199
+ ret half %copysign
200
+ }
201
+
202
+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_fabs (float %x.arg , i32 %y.i ) {
203
+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_fabs:
204
+ ; GFX9: ; %bb.0:
205
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
207
+ ; GFX9-NEXT: s_brev_b32 s4, -2
208
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
209
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
210
+ %x = call float @llvm.fabs.f32 (float %x.arg )
211
+ %y.even = shl i32 %y.i , 31
212
+ %y.even.as.f32 = bitcast i32 %y.even to float
213
+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
214
+ ret float %copysign
215
+ }
216
+
217
+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_select (float %x.arg , i32 %y.i ) {
218
+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_select:
219
+ ; GFX9: ; %bb.0:
220
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221
+ ; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
222
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
223
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
224
+ ; GFX9-NEXT: s_brev_b32 s4, -2
225
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
226
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
227
+ %x.ule.0 = fcmp ule float %x.arg , 0 .0
228
+ %x = select i1 %x.ule.0 , float 0 .0 , float %x.arg
229
+ %y.even = shl i32 %y.i , 31
230
+ %y.even.as.f32 = bitcast i32 %y.even to float
231
+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
232
+ ret float %copysign
233
+ }
234
+
235
+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_nnan_nsz_sqrt (float %x.arg , i32 %y.i ) {
236
+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_nnan_nsz_sqrt:
237
+ ; GFX9: ; %bb.0:
238
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239
+ ; GFX9-NEXT: s_mov_b32 s4, 0xf800000
240
+ ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
241
+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
242
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
243
+ ; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
244
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
245
+ ; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
246
+ ; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
247
+ ; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
248
+ ; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
249
+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
250
+ ; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
251
+ ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
252
+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
253
+ ; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
254
+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
255
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
256
+ ; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
257
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
258
+ ; GFX9-NEXT: s_brev_b32 s4, -2
259
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
260
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
261
+ %x = call nnan nsz float @llvm.sqrt.f32 (float %x.arg )
262
+ %y.even = shl i32 %y.i , 31
263
+ %y.even.as.f32 = bitcast i32 %y.even to float
264
+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
265
+ ret float %copysign
266
+ }
267
+
268
+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nsz_sqrt (float %x.arg , i32 %y.i ) {
269
+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nsz_sqrt:
270
+ ; GFX9: ; %bb.0:
271
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272
+ ; GFX9-NEXT: s_mov_b32 s4, 0xf800000
273
+ ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
274
+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
275
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
276
+ ; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
277
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
278
+ ; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
279
+ ; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
280
+ ; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
281
+ ; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
282
+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
283
+ ; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
284
+ ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
285
+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
286
+ ; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
287
+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
288
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
289
+ ; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
290
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
291
+ ; GFX9-NEXT: s_brev_b32 s4, -2
292
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
293
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
294
+ %x = call nsz float @llvm.sqrt.f32 (float %x.arg )
295
+ %y.even = shl i32 %y.i , 31
296
+ %y.even.as.f32 = bitcast i32 %y.even to float
297
+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
298
+ ret float %copysign
299
+ }
300
+
301
+ define float @copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nnan_sqrt (float %x.arg , i32 %y.i ) {
302
+ ; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nnan_sqrt:
303
+ ; GFX9: ; %bb.0:
304
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305
+ ; GFX9-NEXT: s_mov_b32 s4, 0xf800000
306
+ ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
307
+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
308
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
309
+ ; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
310
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
311
+ ; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
312
+ ; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
313
+ ; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
314
+ ; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
315
+ ; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
316
+ ; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
317
+ ; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
318
+ ; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
319
+ ; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
320
+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
321
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
322
+ ; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
323
+ ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
324
+ ; GFX9-NEXT: s_brev_b32 s4, -2
325
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
326
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
327
+ %x = call nnan float @llvm.sqrt.f32 (float %x.arg )
328
+ %y.even = shl i32 %y.i , 31
329
+ %y.even.as.f32 = bitcast i32 %y.even to float
330
+ %copysign = call float @llvm.copysign.f32 (float %x , float %y.even.as.f32 )
331
+ ret float %copysign
332
+ }
333
+
334
+ define float @test_copysign_pow_fast_f32__integral_y (float %x , i32 %y.i ) {
335
+ ; GFX9-LABEL: test_copysign_pow_fast_f32__integral_y:
336
+ ; GFX9: ; %bb.0:
337
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338
+ ; GFX9-NEXT: s_mov_b32 s4, 0x800000
339
+ ; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
340
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
341
+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
342
+ ; GFX9-NEXT: v_mul_f32_e64 v3, |v0|, v3
343
+ ; GFX9-NEXT: v_log_f32_e32 v3, v3
344
+ ; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
345
+ ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
346
+ ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
347
+ ; GFX9-NEXT: v_sub_f32_e32 v2, v3, v2
348
+ ; GFX9-NEXT: v_mul_f32_e32 v3, v2, v1
349
+ ; GFX9-NEXT: s_mov_b32 s4, 0xc2fc0000
350
+ ; GFX9-NEXT: v_mov_b32_e32 v4, 0x42800000
351
+ ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
352
+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
353
+ ; GFX9-NEXT: v_fma_f32 v2, v2, v1, v3
354
+ ; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
355
+ ; GFX9-NEXT: v_exp_f32_e32 v2, v2
356
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 0x1f800000
357
+ ; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
358
+ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
359
+ ; GFX9-NEXT: v_mul_f32_e32 v2, v2, v3
360
+ ; GFX9-NEXT: v_and_b32_e32 v0, v1, v0
361
+ ; GFX9-NEXT: s_brev_b32 s4, -2
362
+ ; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0
363
+ ; GFX9-NEXT: s_setpc_b64 s[30:31]
364
+ %y = sitofp i32 %y.i to float
365
+ %y.fptosi = fptosi float %y to i32
366
+ %fabs = call fast float @llvm.fabs.f32 (float %x )
367
+ %log2 = call fast float @llvm.log2.f32 (float %fabs )
368
+ %pownI2F = sitofp i32 %y.i to float
369
+ %ylogx = fmul fast float %log2 , %pownI2F
370
+ %exp2 = call fast float @llvm.exp2.f32 (float %ylogx )
371
+ %yeven = shl i32 %y.fptosi , 31
372
+ %x.i32 = bitcast float %x to i32
373
+ %pow_sign = and i32 %yeven , %x.i32
374
+ %pow_sign.f32 = bitcast i32 %pow_sign to float
375
+ %pow_sign1 = call fast float @llvm.copysign.f32 (float %exp2 , float %pow_sign.f32 )
376
+ ret float %pow_sign1
377
+ }
378
+
157
379
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
0 commit comments