@@ -1639,10 +1639,7 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
1639
1639
; VI-NEXT: v_or_b32_e32 v2, 0x7c00, v2
1640
1640
; VI-NEXT: v_mov_b32_e32 v3, s2
1641
1641
; VI-NEXT: s_cselect_b64 vcc, -1, 0
1642
- ; VI-NEXT: s_lshr_b32 s0, s7, 16
1643
1642
; VI-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
1644
- ; VI-NEXT: s_and_b32 s0, s0, 0x8000
1645
- ; VI-NEXT: v_or_b32_e32 v2, s0, v2
1646
1643
; VI-NEXT: s_movk_i32 s0, 0x7fff
1647
1644
; VI-NEXT: v_mov_b32_e32 v3, s8
1648
1645
; VI-NEXT: v_bfi_b32 v2, s0, v2, v3
@@ -1673,36 +1670,33 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
1673
1670
; GFX9-NEXT: v_lshlrev_b32_e64 v1, v1, s2
1674
1671
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, s0, v1
1675
1672
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1676
- ; GFX9-NEXT: s_add_i32 s9 , s1, 0xfffffc10
1673
+ ; GFX9-NEXT: s_add_i32 s7 , s1, 0xfffffc10
1677
1674
; GFX9-NEXT: v_readfirstlane_b32 s0, v1
1678
- ; GFX9-NEXT: s_lshl_b32 s1, s9 , 12
1675
+ ; GFX9-NEXT: s_lshl_b32 s1, s7 , 12
1679
1676
; GFX9-NEXT: s_or_b32 s0, s2, s0
1680
1677
; GFX9-NEXT: s_or_b32 s1, s6, s1
1681
- ; GFX9-NEXT: s_cmp_lt_i32 s9 , 1
1682
- ; GFX9-NEXT: s_cselect_b32 s10 , s0, s1
1683
- ; GFX9-NEXT: s_and_b32 s2, s10 , 7
1678
+ ; GFX9-NEXT: s_cmp_lt_i32 s7 , 1
1679
+ ; GFX9-NEXT: s_cselect_b32 s9 , s0, s1
1680
+ ; GFX9-NEXT: s_and_b32 s2, s9 , 7
1684
1681
; GFX9-NEXT: s_cmp_gt_i32 s2, 5
1685
1682
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
1686
1683
; GFX9-NEXT: s_cmp_eq_u32 s2, 3
1687
1684
; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
1688
1685
; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1689
- ; GFX9-NEXT: s_lshr_b32 s2, s10 , 2
1686
+ ; GFX9-NEXT: s_lshr_b32 s2, s9 , 2
1690
1687
; GFX9-NEXT: s_cmp_lg_u64 s[0:1], 0
1691
1688
; GFX9-NEXT: s_addc_u32 s0, s2, 0
1692
- ; GFX9-NEXT: s_cmp_lt_i32 s9 , 31
1689
+ ; GFX9-NEXT: s_cmp_lt_i32 s7 , 31
1693
1690
; GFX9-NEXT: s_cselect_b32 s2, s0, 0x7c00
1694
1691
; GFX9-NEXT: s_cmp_lg_u32 s6, 0
1695
1692
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
1696
1693
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
1697
1694
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 9, v1
1698
- ; GFX9-NEXT: s_cmpk_eq_i32 s9 , 0x40f
1695
+ ; GFX9-NEXT: s_cmpk_eq_i32 s7 , 0x40f
1699
1696
; GFX9-NEXT: v_or_b32_e32 v1, 0x7c00, v1
1700
1697
; GFX9-NEXT: v_mov_b32_e32 v2, s2
1701
1698
; GFX9-NEXT: s_cselect_b64 vcc, -1, 0
1702
- ; GFX9-NEXT: s_lshr_b32 s0, s7, 16
1703
1699
; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc
1704
- ; GFX9-NEXT: s_and_b32 s0, s0, 0x8000
1705
- ; GFX9-NEXT: v_or_b32_e32 v1, s0, v1
1706
1700
; GFX9-NEXT: s_movk_i32 s0, 0x7fff
1707
1701
; GFX9-NEXT: v_mov_b32_e32 v2, s8
1708
1702
; GFX9-NEXT: v_bfi_b32 v1, s0, v1, v2
@@ -1728,13 +1722,13 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
1728
1722
; GFX11-NEXT: s_addk_i32 s1, 0xfc10
1729
1723
; GFX11-NEXT: v_med3_i32 v1, s3, 0, 13
1730
1724
; GFX11-NEXT: v_readfirstlane_b32 s3, v0
1731
- ; GFX11-NEXT: s_lshl_b32 s8 , s1, 12
1725
+ ; GFX11-NEXT: s_lshl_b32 s7 , s1, 12
1732
1726
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1733
1727
; GFX11-NEXT: v_readfirstlane_b32 s6, v1
1734
1728
; GFX11-NEXT: s_or_b32 s2, s2, s3
1735
1729
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1736
1730
; GFX11-NEXT: s_or_b32 s3, s2, 0x1000
1737
- ; GFX11-NEXT: s_or_b32 s8 , s2, s8
1731
+ ; GFX11-NEXT: s_or_b32 s7 , s2, s7
1738
1732
; GFX11-NEXT: s_lshr_b32 s6, s3, s6
1739
1733
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1740
1734
; GFX11-NEXT: v_lshlrev_b32_e64 v0, v1, s6
@@ -1745,15 +1739,15 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
1745
1739
; GFX11-NEXT: v_readfirstlane_b32 s3, v0
1746
1740
; GFX11-NEXT: s_or_b32 s3, s6, s3
1747
1741
; GFX11-NEXT: s_cmp_lt_i32 s1, 1
1748
- ; GFX11-NEXT: s_cselect_b32 s3, s3, s8
1742
+ ; GFX11-NEXT: s_cselect_b32 s3, s3, s7
1749
1743
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1750
1744
; GFX11-NEXT: s_and_b32 s6, s3, 7
1751
1745
; GFX11-NEXT: s_cmp_gt_i32 s6, 5
1752
- ; GFX11-NEXT: s_cselect_b32 s8 , -1, 0
1746
+ ; GFX11-NEXT: s_cselect_b32 s7 , -1, 0
1753
1747
; GFX11-NEXT: s_cmp_eq_u32 s6, 3
1754
1748
; GFX11-NEXT: s_cselect_b32 s6, -1, 0
1755
1749
; GFX11-NEXT: s_lshr_b32 s3, s3, 2
1756
- ; GFX11-NEXT: s_or_b32 s6, s6, s8
1750
+ ; GFX11-NEXT: s_or_b32 s6, s6, s7
1757
1751
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1758
1752
; GFX11-NEXT: s_cmp_lg_u32 s6, 0
1759
1753
; GFX11-NEXT: s_addc_u32 s3, s3, 0
@@ -1764,15 +1758,11 @@ define amdgpu_kernel void @s_copysign_out_f16_mag_f64_sign_f16(ptr addrspace(1)
1764
1758
; GFX11-NEXT: s_cmpk_eq_i32 s1, 0x40f
1765
1759
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
1766
1760
; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0
1767
- ; GFX11-NEXT: s_lshr_b32 s1, s7, 16
1768
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1769
- ; GFX11-NEXT: s_and_b32 s1, s1, 0x8000
1770
- ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 9, v0
1771
1761
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1762
+ ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 9, v0
1772
1763
; GFX11-NEXT: v_or_b32_e32 v0, 0x7c00, v0
1773
- ; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo
1774
1764
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1775
- ; GFX11-NEXT: v_or_b32_e32 v0, s1 , v0
1765
+ ; GFX11-NEXT: v_cndmask_b32_e32 v0, s3 , v0, vcc_lo
1776
1766
; GFX11-NEXT: v_bfi_b32 v0, 0x7fff, v0, s0
1777
1767
; GFX11-NEXT: global_store_b16 v1, v0, s[4:5]
1778
1768
; GFX11-NEXT: s_nop 0
0 commit comments