Skip to content

Commit f808c8a

Browse files
committed
AMDGPU: Add baseline test for copysign combine
Pre-commit tests showing we try to SimplifyDemandedBits on the sign operand.
1 parent 03d9a31 commit f808c8a

File tree

1 file changed

+165
-0
lines changed

1 file changed

+165
-0
lines changed
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3+
4+
; Test that we use SimplifyDemandedBits on copysign's sign
5+
; operand. These are somewhat simplified extractions from fast pown
6+
; expansions.
7+
8+
define half @test_pown_reduced_fast_f16_known_odd(half %x, i32 %y.arg) #0 {
9+
; GFX9-LABEL: test_pown_reduced_fast_f16_known_odd:
10+
; GFX9: ; %bb.0:
11+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12+
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
13+
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
14+
; GFX9-NEXT: v_and_b32_e32 v2, 0xffff8000, v0
15+
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
16+
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
17+
; GFX9-NEXT: v_mul_f16_e64 v0, |v0|, v1
18+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2
19+
; GFX9-NEXT: s_setpc_b64 s[30:31]
20+
%y = or i32 %y.arg, 1
21+
%fabs = call half @llvm.fabs.f16(half %x)
22+
%pownI2F = sitofp i32 %y to half
23+
%ylogx = fmul half %fabs, %pownI2F
24+
%cast_x = bitcast half %x to i16
25+
%pow_sign = and i16 %cast_x, -32768
26+
%cast_sign = bitcast i16 %pow_sign to half
27+
%pow_sign1 = call half @llvm.copysign.f16(half %ylogx, half %cast_sign)
28+
ret half %pow_sign1
29+
}
30+
31+
define <2 x half> @test_pown_reduced_fast_v2f16_known_odd(<2 x half> %x, <2 x i32> %y.arg) #0 {
32+
; GFX9-LABEL: test_pown_reduced_fast_v2f16_known_odd:
33+
; GFX9: ; %bb.0:
34+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35+
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
36+
; GFX9-NEXT: v_or_b32_e32 v2, 1, v2
37+
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, v2
38+
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
39+
; GFX9-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0
40+
; GFX9-NEXT: v_and_b32_e32 v0, 0x80008000, v0
41+
; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2
42+
; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1
43+
; GFX9-NEXT: s_movk_i32 s4, 0x7fff
44+
; GFX9-NEXT: v_pack_b32_f16 v1, v1, v2
45+
; GFX9-NEXT: v_pk_mul_f16 v1, v3, v1
46+
; GFX9-NEXT: v_bfi_b32 v2, s4, v1, v0
47+
; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1
48+
; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0
49+
; GFX9-NEXT: v_bfi_b32 v0, s4, v1, v0
50+
; GFX9-NEXT: s_mov_b32 s4, 0x5040100
51+
; GFX9-NEXT: v_perm_b32 v0, v0, v2, s4
52+
; GFX9-NEXT: s_setpc_b64 s[30:31]
53+
%y = or <2 x i32> %y.arg, <i32 1, i32 1>
54+
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
55+
%pownI2F = sitofp <2 x i32> %y to <2 x half>
56+
%ylogx = fmul <2 x half> %fabs, %pownI2F
57+
%cast_x = bitcast <2 x half> %x to <2 x i16>
58+
%pow_sign = and <2 x i16> %cast_x, <i16 -32768, i16 -32768>
59+
%cast_sign = bitcast <2 x i16> %pow_sign to <2 x half>
60+
%pow_sign1 = call <2 x half> @llvm.copysign.v2f16(<2 x half> %ylogx, <2 x half> %cast_sign)
61+
ret <2 x half> %pow_sign1
62+
}
63+
64+
define float @test_pown_reduced_fast_f32_known_odd(float %x, i32 %y.arg) #0 {
65+
; GFX9-LABEL: test_pown_reduced_fast_f32_known_odd:
66+
; GFX9: ; %bb.0:
67+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68+
; GFX9-NEXT: v_or_b32_e32 v1, 1, v1
69+
; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1
70+
; GFX9-NEXT: v_and_b32_e32 v2, 0x80000000, v0
71+
; GFX9-NEXT: s_brev_b32 s4, -2
72+
; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1
73+
; GFX9-NEXT: v_bfi_b32 v0, s4, v0, v2
74+
; GFX9-NEXT: s_setpc_b64 s[30:31]
75+
%y = or i32 %y.arg, 1
76+
%fabs = call float @llvm.fabs.f32(float %x)
77+
%pownI2F = sitofp i32 %y to float
78+
%ylogx = fmul float %fabs, %pownI2F
79+
%cast_x = bitcast float %x to i32
80+
%pow_sign = and i32 %cast_x, -2147483648
81+
%cast_sign = bitcast i32 %pow_sign to float
82+
%pow_sign1 = call float @llvm.copysign.f32(float %ylogx, float %cast_sign)
83+
ret float %pow_sign1
84+
}
85+
86+
define <2 x float> @test_pown_reduced_fast_v2f32_known_odd(<2 x float> %x, <2 x i32> %y.arg) #0 {
87+
; GFX9-LABEL: test_pown_reduced_fast_v2f32_known_odd:
88+
; GFX9: ; %bb.0:
89+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90+
; GFX9-NEXT: v_or_b32_e32 v3, 1, v3
91+
; GFX9-NEXT: v_or_b32_e32 v2, 1, v2
92+
; GFX9-NEXT: v_cvt_f32_i32_e32 v3, v3
93+
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, v2
94+
; GFX9-NEXT: s_brev_b32 s4, -2
95+
; GFX9-NEXT: v_mul_f32_e64 v3, |v1|, v3
96+
; GFX9-NEXT: v_mul_f32_e64 v2, |v0|, v2
97+
; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1
98+
; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v0
99+
; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0
100+
; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v1
101+
; GFX9-NEXT: s_setpc_b64 s[30:31]
102+
%y = or <2 x i32> %y.arg, <i32 1, i32 1>
103+
%fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x)
104+
%pownI2F = sitofp <2 x i32> %y to <2 x float>
105+
%ylogx = fmul <2 x float> %fabs, %pownI2F
106+
%cast_x = bitcast <2 x float> %x to <2 x i32>
107+
%pow_sign = and <2 x i32> %cast_x, <i32 -2147483648, i32 -2147483648>
108+
%cast_sign = bitcast <2 x i32> %pow_sign to <2 x float>
109+
%pow_sign1 = call <2 x float> @llvm.copysign.v2f32(<2 x float> %ylogx, <2 x float> %cast_sign)
110+
ret <2 x float> %pow_sign1
111+
}
112+
113+
define double @test_pown_reduced_fast_f64_known_odd(double %x, i32 %y.arg) #0 {
114+
; GFX9-LABEL: test_pown_reduced_fast_f64_known_odd:
115+
; GFX9: ; %bb.0:
116+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117+
; GFX9-NEXT: v_or_b32_e32 v2, 1, v2
118+
; GFX9-NEXT: v_cvt_f64_i32_e32 v[2:3], v2
119+
; GFX9-NEXT: s_brev_b32 s4, -2
120+
; GFX9-NEXT: v_mul_f64 v[2:3], |v[0:1]|, v[2:3]
121+
; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v1
122+
; GFX9-NEXT: v_bfi_b32 v1, s4, v3, v0
123+
; GFX9-NEXT: v_mov_b32_e32 v0, v2
124+
; GFX9-NEXT: s_setpc_b64 s[30:31]
125+
%y = or i32 %y.arg, 1
126+
%fabs = call double @llvm.fabs.f64(double %x)
127+
%pownI2F = sitofp i32 %y to double
128+
%ylogx = fmul double %fabs, %pownI2F
129+
%cast_x = bitcast double %x to i64
130+
%pow_sign = and i64 %cast_x, -9223372036854775808
131+
%cast_sign = bitcast i64 %pow_sign to double
132+
%pow_sign1 = call double @llvm.copysign.f64(double %ylogx, double %cast_sign)
133+
ret double %pow_sign1
134+
}
135+
136+
define <2 x double> @test_pown_reduced_fast_v2f64_known_odd(<2 x double> %x, <2 x i32> %y.arg) #0 {
137+
; GFX9-LABEL: test_pown_reduced_fast_v2f64_known_odd:
138+
; GFX9: ; %bb.0:
139+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140+
; GFX9-NEXT: v_or_b32_e32 v6, 1, v5
141+
; GFX9-NEXT: v_or_b32_e32 v4, 1, v4
142+
; GFX9-NEXT: v_cvt_f64_i32_e32 v[4:5], v4
143+
; GFX9-NEXT: v_cvt_f64_i32_e32 v[6:7], v6
144+
; GFX9-NEXT: s_brev_b32 s4, -2
145+
; GFX9-NEXT: v_mul_f64 v[4:5], |v[0:1]|, v[4:5]
146+
; GFX9-NEXT: v_mul_f64 v[6:7], |v[2:3]|, v[6:7]
147+
; GFX9-NEXT: v_and_b32_e32 v0, 0x80000000, v3
148+
; GFX9-NEXT: v_and_b32_e32 v1, 0x80000000, v1
149+
; GFX9-NEXT: v_bfi_b32 v1, s4, v5, v1
150+
; GFX9-NEXT: v_bfi_b32 v3, s4, v7, v0
151+
; GFX9-NEXT: v_mov_b32_e32 v0, v4
152+
; GFX9-NEXT: v_mov_b32_e32 v2, v6
153+
; GFX9-NEXT: s_setpc_b64 s[30:31]
154+
%y = or <2 x i32> %y.arg, <i32 1, i32 1>
155+
%fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %x)
156+
%pownI2F = sitofp <2 x i32> %y to <2 x double>
157+
%ylogx = fmul <2 x double> %fabs, %pownI2F
158+
%cast_x = bitcast <2 x double> %x to <2 x i64>
159+
%pow_sign = and <2 x i64> %cast_x, <i64 -9223372036854775808, i64 -9223372036854775808>
160+
%cast_sign = bitcast <2 x i64> %pow_sign to <2 x double>
161+
%pow_sign1 = call <2 x double> @llvm.copysign.f64(<2 x double> %ylogx, <2 x double> %cast_sign)
162+
ret <2 x double> %pow_sign1
163+
}
164+
165+
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

0 commit comments

Comments
 (0)