@@ -1769,9 +1769,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1769
1769
; GFX9-NEXT: s_mov_b32 s0, 0
1770
1770
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
1771
1771
; GFX9-NEXT: s_waitcnt vmcnt(0)
1772
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1773
1772
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1774
- ; GFX9-NEXT: s_add_i32 s0, s0, 4
1773
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e84
1775
1774
; GFX9-NEXT: scratch_store_dword off, v0, s0
1776
1775
; GFX9-NEXT: s_waitcnt vmcnt(0)
1777
1776
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1786,8 +1785,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1786
1785
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1787
1786
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1788
1787
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1789
- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1790
- ; GFX10-NEXT: s_add_i32 s0, s0, 4
1788
+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e84
1791
1789
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
1792
1790
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1793
1791
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1799,11 +1797,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1799
1797
; GFX942-LABEL: store_load_large_imm_offset_kernel:
1800
1798
; GFX942: ; %bb.0: ; %bb
1801
1799
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1802
- ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
1803
1800
; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
1804
1801
; GFX942-NEXT: s_waitcnt vmcnt(0)
1805
1802
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1806
- ; GFX942-NEXT: s_add_i32 s0, s0, 4
1803
+ ; GFX942-NEXT: s_movk_i32 s0, 0x3e84
1807
1804
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1808
1805
; GFX942-NEXT: s_waitcnt vmcnt(0)
1809
1806
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1813,9 +1810,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1813
1810
; GFX11-LABEL: store_load_large_imm_offset_kernel:
1814
1811
; GFX11: ; %bb.0: ; %bb
1815
1812
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1816
- ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1817
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1818
- ; GFX11-NEXT: s_add_i32 s0, s0, 4
1813
+ ; GFX11-NEXT: s_movk_i32 s0, 0x3e84
1819
1814
; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
1820
1815
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1821
1816
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1843,9 +1838,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1843
1838
; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0
1844
1839
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4
1845
1840
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1846
- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
1847
1841
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1848
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4
1842
+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84
1849
1843
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
1850
1844
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1851
1845
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1860,8 +1854,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1860
1854
; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9
1861
1855
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
1862
1856
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
1863
- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
1864
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4
1857
+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84
1865
1858
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4
1866
1859
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1867
1860
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1873,11 +1866,10 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1873
1866
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel:
1874
1867
; UNALIGNED_GFX942: ; %bb.0: ; %bb
1875
1868
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
1876
- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
1877
1869
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1
1878
1870
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
1879
1871
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
1880
- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4
1872
+ ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84
1881
1873
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1882
1874
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
1883
1875
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1887,9 +1879,7 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
1887
1879
; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel:
1888
1880
; UNALIGNED_GFX11: ; %bb.0: ; %bb
1889
1881
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1890
- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
1891
- ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1892
- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4
1882
+ ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84
1893
1883
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc
1894
1884
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1895
1885
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -1923,13 +1913,11 @@ define void @store_load_large_imm_offset_foo() {
1923
1913
; GFX9-LABEL: store_load_large_imm_offset_foo:
1924
1914
; GFX9: ; %bb.0: ; %bb
1925
1915
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1927
1916
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1928
- ; GFX9-NEXT: s_add_i32 s1, s32, s0
1929
1917
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1930
1918
; GFX9-NEXT: s_waitcnt vmcnt(0)
1931
1919
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1932
- ; GFX9-NEXT: s_add_i32 s0, s1, 4
1920
+ ; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
1933
1921
; GFX9-NEXT: scratch_store_dword off, v0, s0
1934
1922
; GFX9-NEXT: s_waitcnt vmcnt(0)
1935
1923
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1940,10 +1928,8 @@ define void @store_load_large_imm_offset_foo() {
1940
1928
; GFX10: ; %bb.0: ; %bb
1941
1929
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942
1930
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1943
- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1944
1931
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1945
- ; GFX10-NEXT: s_add_i32 s1, s32, s0
1946
- ; GFX10-NEXT: s_add_i32 s0, s1, 4
1932
+ ; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
1947
1933
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
1948
1934
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1949
1935
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1955,13 +1941,11 @@ define void @store_load_large_imm_offset_foo() {
1955
1941
; GFX942-LABEL: store_load_large_imm_offset_foo:
1956
1942
; GFX942: ; %bb.0: ; %bb
1957
1943
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958
- ; GFX942-NEXT: s_movk_i32 s0, 0x3e80
1959
1944
; GFX942-NEXT: v_mov_b32_e32 v0, 13
1960
- ; GFX942-NEXT: s_add_i32 s1, s32, s0
1961
1945
; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
1962
1946
; GFX942-NEXT: s_waitcnt vmcnt(0)
1963
1947
; GFX942-NEXT: v_mov_b32_e32 v0, 15
1964
- ; GFX942-NEXT: s_add_i32 s0, s1, 4
1948
+ ; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
1965
1949
; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1966
1950
; GFX942-NEXT: s_waitcnt vmcnt(0)
1967
1951
; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1972,10 +1956,7 @@ define void @store_load_large_imm_offset_foo() {
1972
1956
; GFX11: ; %bb.0: ; %bb
1973
1957
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1974
1958
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1975
- ; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1976
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1977
- ; GFX11-NEXT: s_add_i32 s1, s32, s0
1978
- ; GFX11-NEXT: s_add_i32 s0, s1, 4
1959
+ ; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
1979
1960
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
1980
1961
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1981
1962
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
@@ -2004,13 +1985,11 @@ define void @store_load_large_imm_offset_foo() {
2004
1985
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
2005
1986
; UNALIGNED_GFX9: ; %bb.0: ; %bb
2006
1987
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007
- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
2008
1988
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
2009
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
2010
1989
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
2011
1990
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
2012
1991
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
2013
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
1992
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84
2014
1993
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
2015
1994
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
2016
1995
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2021,10 +2000,8 @@ define void @store_load_large_imm_offset_foo() {
2021
2000
; UNALIGNED_GFX10: ; %bb.0: ; %bb
2022
2001
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2023
2002
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2024
- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2025
2003
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2026
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2027
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2004
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84
2028
2005
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
2029
2006
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2030
2007
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -2036,13 +2013,11 @@ define void @store_load_large_imm_offset_foo() {
2036
2013
; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo:
2037
2014
; UNALIGNED_GFX942: ; %bb.0: ; %bb
2038
2015
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039
- ; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80
2040
2016
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13
2041
- ; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0
2042
2017
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
2043
2018
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2044
2019
; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15
2045
- ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4
2020
+ ; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84
2046
2021
; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
2047
2022
; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0)
2048
2023
; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -2053,10 +2028,7 @@ define void @store_load_large_imm_offset_foo() {
2053
2028
; UNALIGNED_GFX11: ; %bb.0: ; %bb
2054
2029
; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2055
2030
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
2056
- ; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80
2057
- ; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2058
- ; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0
2059
- ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4
2031
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84
2060
2032
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
2061
2033
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
2062
2034
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments