Skip to content

Commit 2feb058

Browse files
committed
AMDGPU: Add baseline test for copysign combine
We can use known bits information to avoid masking out one or both of the operands.
1 parent 097a1d2 commit 2feb058

File tree

1 file changed

+222
-0
lines changed

1 file changed

+222
-0
lines changed

llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,226 @@ define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2
154154
ret <2 x double> %pow_sign1
155155
}
156156

157+
define float @copysign_f32_f32_sign_known_p0_or_n0(float %x, i32 %y.i) {
158+
; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0:
159+
; GFX9: ; %bb.0:
160+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
162+
; GFX9-NEXT: s_brev_b32 s4, -2
163+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
164+
; GFX9-NEXT: s_setpc_b64 s[30:31]
165+
%y.even = shl i32 %y.i, 31
166+
%y.even.as.f32 = bitcast i32 %y.even to float
167+
%copysign = call float @llvm.copysign.f32(float %x, float %y.even.as.f32)
168+
ret float %copysign
169+
}
170+
171+
define double @copysign_f64_f32_sign_known_p0_or_n0(double %x, i32 %y.i) {
172+
; GFX9-LABEL: copysign_f64_f32_sign_known_p0_or_n0:
173+
; GFX9: ; %bb.0:
174+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175+
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v2
176+
; GFX9-NEXT: s_brev_b32 s4, -2
177+
; GFX9-NEXT: v_bfi_b32 v1, s4, v1, v2
178+
; GFX9-NEXT: s_setpc_b64 s[30:31]
179+
%y.even = shl i32 %y.i, 31
180+
%y.even.as.f32 = bitcast i32 %y.even to float
181+
%y.even.as.f32.fpext = fpext float %y.even.as.f32 to double
182+
%copysign = call double @llvm.copysign.f64(double %x, double %y.even.as.f32.fpext)
183+
ret double %copysign
184+
}
185+
186+
define half @copysign_f16_f32_sign_known_p0_or_n0(half %x, i32 %y.i) {
187+
; GFX9-LABEL: copysign_f16_f32_sign_known_p0_or_n0:
188+
; GFX9: ; %bb.0:
189+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
191+
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
192+
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
193+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
194+
; GFX9-NEXT: s_setpc_b64 s[30:31]
195+
%y.even = shl i32 %y.i, 31
196+
%y.even.as.f32 = bitcast i32 %y.even to float
197+
%y.even.as.f32.fptrunc = fptrunc float %y.even.as.f32 to half
198+
%copysign = call half @llvm.copysign.f16(half %x, half %y.even.as.f32.fptrunc)
199+
ret half %copysign
200+
}
201+
202+
define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_fabs(float %x.arg, i32 %y.i) {
203+
; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_fabs:
204+
; GFX9: ; %bb.0:
205+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
207+
; GFX9-NEXT: s_brev_b32 s4, -2
208+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
209+
; GFX9-NEXT: s_setpc_b64 s[30:31]
210+
%x = call float @llvm.fabs.f32(float %x.arg)
211+
%y.even = shl i32 %y.i, 31
212+
%y.even.as.f32 = bitcast i32 %y.even to float
213+
%copysign = call float @llvm.copysign.f32(float %x, float %y.even.as.f32)
214+
ret float %copysign
215+
}
216+
217+
define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_select(float %x.arg, i32 %y.i) {
218+
; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_select:
219+
; GFX9: ; %bb.0:
220+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221+
; GFX9-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
222+
; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
223+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
224+
; GFX9-NEXT: s_brev_b32 s4, -2
225+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
226+
; GFX9-NEXT: s_setpc_b64 s[30:31]
227+
%x.ule.0 = fcmp ule float %x.arg, 0.0
228+
%x = select i1 %x.ule.0, float 0.0, float %x.arg
229+
%y.even = shl i32 %y.i, 31
230+
%y.even.as.f32 = bitcast i32 %y.even to float
231+
%copysign = call float @llvm.copysign.f32(float %x, float %y.even.as.f32)
232+
ret float %copysign
233+
}
234+
235+
define float @copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_nnan_nsz_sqrt(float %x.arg, i32 %y.i) {
236+
; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_known_positive_nnan_nsz_sqrt:
237+
; GFX9: ; %bb.0:
238+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239+
; GFX9-NEXT: s_mov_b32 s4, 0xf800000
240+
; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
241+
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
242+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
243+
; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
244+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
245+
; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
246+
; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
247+
; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
248+
; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
249+
; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
250+
; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
251+
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
252+
; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
253+
; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
254+
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
255+
; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
256+
; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
257+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
258+
; GFX9-NEXT: s_brev_b32 s4, -2
259+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
260+
; GFX9-NEXT: s_setpc_b64 s[30:31]
261+
%x = call nnan nsz float @llvm.sqrt.f32(float %x.arg)
262+
%y.even = shl i32 %y.i, 31
263+
%y.even.as.f32 = bitcast i32 %y.even to float
264+
%copysign = call float @llvm.copysign.f32(float %x, float %y.even.as.f32)
265+
ret float %copysign
266+
}
267+
268+
define float @copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nsz_sqrt(float %x.arg, i32 %y.i) {
269+
; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nsz_sqrt:
270+
; GFX9: ; %bb.0:
271+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272+
; GFX9-NEXT: s_mov_b32 s4, 0xf800000
273+
; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
274+
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
275+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
276+
; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
277+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
278+
; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
279+
; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
280+
; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
281+
; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
282+
; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
283+
; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
284+
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
285+
; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
286+
; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
287+
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
288+
; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
289+
; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
290+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
291+
; GFX9-NEXT: s_brev_b32 s4, -2
292+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
293+
; GFX9-NEXT: s_setpc_b64 s[30:31]
294+
%x = call nsz float @llvm.sqrt.f32(float %x.arg)
295+
%y.even = shl i32 %y.i, 31
296+
%y.even.as.f32 = bitcast i32 %y.even to float
297+
%copysign = call float @llvm.copysign.f32(float %x, float %y.even.as.f32)
298+
ret float %copysign
299+
}
300+
301+
define float @copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nnan_sqrt(float %x.arg, i32 %y.i) {
302+
; GFX9-LABEL: copysign_f32_f32_sign_known_p0_or_n0__mag_almost_positive_nnan_sqrt:
303+
; GFX9: ; %bb.0:
304+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305+
; GFX9-NEXT: s_mov_b32 s4, 0xf800000
306+
; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0
307+
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0
308+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
309+
; GFX9-NEXT: v_sqrt_f32_e32 v2, v0
310+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
311+
; GFX9-NEXT: v_add_u32_e32 v3, -1, v2
312+
; GFX9-NEXT: v_fma_f32 v4, -v3, v2, v0
313+
; GFX9-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4
314+
; GFX9-NEXT: v_add_u32_e32 v4, 1, v2
315+
; GFX9-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5]
316+
; GFX9-NEXT: v_fma_f32 v2, -v4, v2, v0
317+
; GFX9-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2
318+
; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5]
319+
; GFX9-NEXT: v_mul_f32_e32 v3, 0x37800000, v2
320+
; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
321+
; GFX9-NEXT: v_mov_b32_e32 v3, 0x260
322+
; GFX9-NEXT: v_cmp_class_f32_e32 vcc, v0, v3
323+
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
324+
; GFX9-NEXT: s_brev_b32 s4, -2
325+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v1
326+
; GFX9-NEXT: s_setpc_b64 s[30:31]
327+
%x = call nnan float @llvm.sqrt.f32(float %x.arg)
328+
%y.even = shl i32 %y.i, 31
329+
%y.even.as.f32 = bitcast i32 %y.even to float
330+
%copysign = call float @llvm.copysign.f32(float %x, float %y.even.as.f32)
331+
ret float %copysign
332+
}
333+
334+
define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) {
335+
; GFX9-LABEL: test_copysign_pow_fast_f32__integral_y:
336+
; GFX9: ; %bb.0:
337+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338+
; GFX9-NEXT: s_mov_b32 s4, 0x800000
339+
; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4
340+
; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000
341+
; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
342+
; GFX9-NEXT: v_mul_f32_e64 v3, |v0|, v3
343+
; GFX9-NEXT: v_log_f32_e32 v3, v3
344+
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
345+
; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000
346+
; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
347+
; GFX9-NEXT: v_sub_f32_e32 v2, v3, v2
348+
; GFX9-NEXT: v_mul_f32_e32 v3, v2, v1
349+
; GFX9-NEXT: s_mov_b32 s4, 0xc2fc0000
350+
; GFX9-NEXT: v_mov_b32_e32 v4, 0x42800000
351+
; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3
352+
; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc
353+
; GFX9-NEXT: v_fma_f32 v2, v2, v1, v3
354+
; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1
355+
; GFX9-NEXT: v_exp_f32_e32 v2, v2
356+
; GFX9-NEXT: v_mov_b32_e32 v3, 0x1f800000
357+
; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc
358+
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1
359+
; GFX9-NEXT: v_mul_f32_e32 v2, v2, v3
360+
; GFX9-NEXT: v_and_b32_e32 v0, v1, v0
361+
; GFX9-NEXT: s_brev_b32 s4, -2
362+
; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0
363+
; GFX9-NEXT: s_setpc_b64 s[30:31]
364+
%y = sitofp i32 %y.i to float
365+
%y.fptosi = fptosi float %y to i32
366+
%fabs = call fast float @llvm.fabs.f32(float %x)
367+
%log2 = call fast float @llvm.log2.f32(float %fabs)
368+
%pownI2F = sitofp i32 %y.i to float
369+
%ylogx = fmul fast float %log2, %pownI2F
370+
%exp2 = call fast float @llvm.exp2.f32(float %ylogx)
371+
%yeven = shl i32 %y.fptosi, 31
372+
%x.i32 = bitcast float %x to i32
373+
%pow_sign = and i32 %yeven, %x.i32
374+
%pow_sign.f32 = bitcast i32 %pow_sign to float
375+
%pow_sign1 = call fast float @llvm.copysign.f32(float %exp2, float %pow_sign.f32)
376+
ret float %pow_sign1
377+
}
378+
157379
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

0 commit comments

Comments
 (0)