Skip to content

Commit 8eee6d3

Browse files
authored
DAG: Call SimplifyDemandedBits on copysign value operand (#97180)
So far the only cases that seem to benefit are the weird copysign with different typed inputs.
1 parent a632364 commit 8eee6d3

File tree

2 files changed

+20
-25
lines changed

2 files changed

+20
-25
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17571,6 +17571,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
1757117571
APInt::getSignMask(SignVT.getScalarSizeInBits())))
1757217572
return SDValue(N, 0);
1757317573

17574+
// We only take the non-sign bits from the value operand
17575+
if (SimplifyDemandedBits(N0,
17576+
APInt::getSignedMaxValue(VT.getScalarSizeInBits())))
17577+
return SDValue(N, 0);
17578+
1757417579
return SDValue();
1757517580
}
1757617581

llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1639,10 +1639,7 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
16391639
; VI-NEXT: v_or_b32_e32 v2, 0x7c00, v2
16401640
; VI-NEXT: v_mov_b32_e32 v3, s2
16411641
; VI-NEXT: s_cselect_b64 vcc, -1, 0
1642-
; VI-NEXT: s_lshr_b32 s0, s7, 16
16431642
; VI-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
1644-
; VI-NEXT: s_and_b32 s0, s0, 0x8000
1645-
; VI-NEXT: v_or_b32_e32 v2, s0, v2
16461643
; VI-NEXT: s_movk_i32 s0, 0x7fff
16471644
; VI-NEXT: v_mov_b32_e32 v3, s8
16481645
; VI-NEXT: v_bfi_b32 v2, s0, v2, v3
@@ -1673,36 +1670,33 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
16731670
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s2
16741671
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, s0, v1
16751672
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1676-
; GFX9-NEXT: s_add_i32 s9, s1, 0xfffffc10
1673+
; GFX9-NEXT: s_add_i32 s7, s1, 0xfffffc10
16771674
; GFX9-NEXT: v_readfirstlane_b32 s0, v1
1678-
; GFX9-NEXT: s_lshl_b32 s1, s9, 12
1675+
; GFX9-NEXT: s_lshl_b32 s1, s7, 12
16791676
; GFX9-NEXT: s_or_b32 s0, s2, s0
16801677
; GFX9-NEXT: s_or_b32 s1, s6, s1
1681-
; GFX9-NEXT: s_cmp_lt_i32 s9, 1
1682-
; GFX9-NEXT: s_cselect_b32 s10, s0, s1
1683-
; GFX9-NEXT: s_and_b32 s2, s10, 7
1678+
; GFX9-NEXT: s_cmp_lt_i32 s7, 1
1679+
; GFX9-NEXT: s_cselect_b32 s9, s0, s1
1680+
; GFX9-NEXT: s_and_b32 s2, s9, 7
16841681
; GFX9-NEXT: s_cmp_gt_i32 s2, 5
16851682
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
16861683
; GFX9-NEXT: s_cmp_eq_u32 s2, 3
16871684
; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
16881685
; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1689-
; GFX9-NEXT: s_lshr_b32 s2, s10, 2
1686+
; GFX9-NEXT: s_lshr_b32 s2, s9, 2
16901687
; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
16911688
; GFX9-NEXT: s_addc_u32 s0, s2, 0
1692-
; GFX9-NEXT: s_cmp_lt_i32 s9, 31
1689+
; GFX9-NEXT: s_cmp_lt_i32 s7, 31
16931690
; GFX9-NEXT: s_cselect_b32 s2, s0, 0x7c00
16941691
; GFX9-NEXT: s_cmp_lg_u32 s6, 0
16951692
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
16961693
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
16971694
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 9, v1
1698-
; GFX9-NEXT: s_cmpk_eq_i32 s9, 0x40f
1695+
; GFX9-NEXT: s_cmpk_eq_i32 s7, 0x40f
16991696
; GFX9-NEXT: v_or_b32_e32 v1, 0x7c00, v1
17001697
; GFX9-NEXT: v_mov_b32_e32 v2, s2
17011698
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
1702-
; GFX9-NEXT: s_lshr_b32 s0, s7, 16
17031699
; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
1704-
; GFX9-NEXT: s_and_b32 s0, s0, 0x8000
1705-
; GFX9-NEXT: v_or_b32_e32 v1, s0, v1
17061700
; GFX9-NEXT: s_movk_i32 s0, 0x7fff
17071701
; GFX9-NEXT: v_mov_b32_e32 v2, s8
17081702
; GFX9-NEXT: v_bfi_b32 v1, s0, v1, v2
@@ -1728,13 +1722,13 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
17281722
; GFX11-NEXT: s_addk_i32 s1, 0xfc10
17291723
; GFX11-NEXT: v_med3_i32 v1, s3, 0, 13
17301724
; GFX11-NEXT: v_readfirstlane_b32 s3, v0
1731-
; GFX11-NEXT: s_lshl_b32 s8, s1, 12
1725+
; GFX11-NEXT: s_lshl_b32 s7, s1, 12
17321726
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
17331727
; GFX11-NEXT: v_readfirstlane_b32 s6, v1
17341728
; GFX11-NEXT: s_or_b32 s2, s2, s3
17351729
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
17361730
; GFX11-NEXT: s_or_b32 s3, s2, 0x1000
1737-
; GFX11-NEXT: s_or_b32 s8, s2, s8
1731+
; GFX11-NEXT: s_or_b32 s7, s2, s7
17381732
; GFX11-NEXT: s_lshr_b32 s6, s3, s6
17391733
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
17401734
; GFX11-NEXT: v_lshlrev_b32_e64 v0, v1, s6
@@ -1745,15 +1739,15 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
17451739
; GFX11-NEXT: v_readfirstlane_b32 s3, v0
17461740
; GFX11-NEXT: s_or_b32 s3, s6, s3
17471741
; GFX11-NEXT: s_cmp_lt_i32 s1, 1
1748-
; GFX11-NEXT: s_cselect_b32 s3, s3, s8
1742+
; GFX11-NEXT: s_cselect_b32 s3, s3, s7
17491743
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
17501744
; GFX11-NEXT: s_and_b32 s6, s3, 7
17511745
; GFX11-NEXT: s_cmp_gt_i32 s6, 5
1752-
; GFX11-NEXT: s_cselect_b32 s8, -1, 0
1746+
; GFX11-NEXT: s_cselect_b32 s7, -1, 0
17531747
; GFX11-NEXT: s_cmp_eq_u32 s6, 3
17541748
; GFX11-NEXT: s_cselect_b32 s6, -1, 0
17551749
; GFX11-NEXT: s_lshr_b32 s3, s3, 2
1756-
; GFX11-NEXT: s_or_b32 s6, s6, s8
1750+
; GFX11-NEXT: s_or_b32 s6, s6, s7
17571751
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
17581752
; GFX11-NEXT: s_cmp_lg_u32 s6, 0
17591753
; GFX11-NEXT: s_addc_u32 s3, s3, 0
@@ -1764,15 +1758,11 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
17641758
; GFX11-NEXT: s_cmpk_eq_i32 s1, 0x40f
17651759
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
17661760
; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1767-
; GFX11-NEXT: s_lshr_b32 s1, s7, 16
1768-
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1769-
; GFX11-NEXT: s_and_b32 s1, s1, 0x8000
1770-
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 9, v0
17711761
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1762+
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 9, v0
17721763
; GFX11-NEXT: v_or_b32_e32 v0, 0x7c00, v0
1773-
; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
17741764
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1775-
; GFX11-NEXT: v_or_b32_e32 v0, s1, v0
1765+
; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
17761766
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, s0
17771767
; GFX11-NEXT: global_store_b16 v1, v0, s[4:5]
17781768
; GFX11-NEXT: s_nop 0

0 commit comments

Comments
 (0)