@@ -1791,25 +1791,21 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
1791
1791
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1792
1792
; GFX10-NEXT: v_and_b32_e32 v4, 0x3ff, v31
1793
1793
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 2, v4
1794
- ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
1795
- ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
1796
1794
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v4
1797
1795
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
1798
- ; GFX10-NEXT: global_load_dword v4, v[2:3], off
1799
- ; GFX10-NEXT: global_load_dword v9, v[0:1], off
1796
+ ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
1797
+ ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
1798
+ ; GFX10-NEXT: global_load_dword v4, v[0:1], off
1799
+ ; GFX10-NEXT: global_load_dword v9, v[2:3], off
1800
1800
; GFX10-NEXT: s_waitcnt vmcnt(1)
1801
- ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v4
1802
- ; GFX10-NEXT: s_waitcnt vmcnt(0)
1803
- ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v9
1804
- ; GFX10-NEXT: v_ashrrev_i16 v2, 8, v9
1805
- ; GFX10-NEXT: v_ashrrev_i16 v3, 8, v4
1806
- ; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x6010205
1807
- ; GFX10-NEXT: v_bfe_i32 v10, v0, 0, 8
1808
- ; GFX10-NEXT: v_bfe_i32 v1, v1, 0, 8
1809
- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1810
- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1811
- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1812
- ; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1801
+ ; GFX10-NEXT: v_ashrrev_i16 v0, 8, v4
1802
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
1803
+ ; GFX10-NEXT: v_ashrrev_i16 v10, 8, v9
1804
+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1805
+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1806
+ ; GFX10-NEXT: v_perm_b32 v4, v9, v4, 0x6010205
1807
+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1808
+ ; GFX10-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1813
1809
; GFX10-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
1814
1810
; GFX10-NEXT: global_store_dword v[5:6], v4, off
1815
1811
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1821,24 +1817,20 @@ define hidden void @sitofp_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in
1821
1817
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v4
1822
1818
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4
1823
1819
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1824
- ; GFX9-NEXT: global_load_dword v9, v[0:1], off
1825
- ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v4
1826
- ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
1820
+ ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
1821
+ ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
1827
1822
; GFX9-NEXT: global_load_dword v4, v[0:1], off
1823
+ ; GFX9-NEXT: global_load_dword v9, v[2:3], off
1828
1824
; GFX9-NEXT: s_mov_b32 s4, 0x6010205
1829
1825
; GFX9-NEXT: s_waitcnt vmcnt(1)
1830
- ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v9
1831
- ; GFX9-NEXT: v_ashrrev_i16_e32 v1, 8, v9
1832
- ; GFX9-NEXT: v_bfe_i32 v10, v0, 0, 8
1833
- ; GFX9-NEXT: s_waitcnt vmcnt(0)
1834
- ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4
1835
- ; GFX9-NEXT: v_ashrrev_i16_e32 v3, 8, v4
1836
- ; GFX9-NEXT: v_bfe_i32 v11, v2, 0, 8
1837
- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1838
- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1839
- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v11) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1840
- ; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1841
- ; GFX9-NEXT: v_perm_b32 v4, v4, v9, s4
1826
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v4
1827
+ ; GFX9-NEXT: s_waitcnt vmcnt(0)
1828
+ ; GFX9-NEXT: v_ashrrev_i16_e32 v10, 8, v9
1829
+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1830
+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
1831
+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1832
+ ; GFX9-NEXT: v_cvt_f32_i32_sdwa v0, sext(v10) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
1833
+ ; GFX9-NEXT: v_perm_b32 v4, v9, v4, s4
1842
1834
; GFX9-NEXT: global_store_dwordx4 v[7:8], v[0:3], off
1843
1835
; GFX9-NEXT: global_store_dword v[5:6], v4, off
1844
1836
; GFX9-NEXT: s_waitcnt vmcnt(0)
0 commit comments