@@ -21,9 +21,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
21
21
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
22
22
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
23
23
; GFX9-NEXT: s_and_b32 s0, s0, 15
24
+ ; GFX9-NEXT: s_add_i32 s1, s1, 0
24
25
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
25
26
; GFX9-NEXT: scratch_store_dword off, v0, s1
26
27
; GFX9-NEXT: s_waitcnt vmcnt(0)
28
+ ; GFX9-NEXT: s_add_i32 s0, s0, 0
27
29
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
28
30
; GFX9-NEXT: s_waitcnt vmcnt(0)
29
31
; GFX9-NEXT: s_endpgm
@@ -40,6 +42,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
40
42
; GFX10-NEXT: s_and_b32 s1, s0, 15
41
43
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
42
44
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
45
+ ; GFX10-NEXT: s_add_i32 s0, s0, 0
46
+ ; GFX10-NEXT: s_add_i32 s1, s1, 0
43
47
; GFX10-NEXT: scratch_store_dword off, v0, s0
44
48
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
45
49
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -53,6 +57,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
53
57
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
54
58
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
55
59
; GFX940-NEXT: s_and_b32 s0, s0, 15
60
+ ; GFX940-NEXT: s_add_i32 s1, s1, 0
56
61
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
57
62
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
58
63
; GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -70,6 +75,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
70
75
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
71
76
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
72
77
; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
78
+ ; GFX11-NEXT: s_add_i32 s0, s0, 0
73
79
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
74
80
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
75
81
; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -102,9 +108,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
102
108
; UNALIGNED_GFX9-NEXT: s_waitcnt lgkmcnt(0)
103
109
; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2
104
110
; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15
111
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s1, 0
105
112
; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2
106
113
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1
107
114
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
115
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 0
108
116
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
109
117
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
110
118
; UNALIGNED_GFX9-NEXT: s_endpgm
@@ -121,6 +129,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
121
129
; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15
122
130
; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2
123
131
; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2
132
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 0
133
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s1, 0
124
134
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0
125
135
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
126
136
; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -134,6 +144,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
134
144
; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0)
135
145
; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2
136
146
; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15
147
+ ; UNALIGNED_GFX940-NEXT: s_add_i32 s1, s1, 0
137
148
; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2
138
149
; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
139
150
; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -151,6 +162,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
151
162
; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2
152
163
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
153
164
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
165
+ ; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 0
154
166
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
155
167
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
156
168
; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -1911,13 +1923,13 @@ define void @store_load_large_imm_offset_foo() {
1911
1923
; GFX9-LABEL: store_load_large_imm_offset_foo:
1912
1924
; GFX9: ; %bb.0: ; %bb
1913
1925
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1914
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1915
1926
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1916
- ; GFX9-NEXT: s_add_i32 s1, s32, s0
1927
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1928
+ ; GFX9-NEXT: s_add_i32 s1, s32, 4
1917
1929
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1918
1930
; GFX9-NEXT: s_waitcnt vmcnt(0)
1919
1931
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1920
- ; GFX9-NEXT: s_add_i32 s0, s1, 4
1932
+ ; GFX9-NEXT: s_add_i32 s0, s0, s1
1921
1933
; GFX9-NEXT: scratch_store_dword off, v0, s0
1922
1934
; GFX9-NEXT: s_waitcnt vmcnt(0)
1923
1935
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1928,10 +1940,10 @@ define void @store_load_large_imm_offset_foo() {
1928
1940
; GFX10: ; %bb.0: ; %bb
1929
1941
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1930
1942
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1931
- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1932
1943
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1933
- ; GFX10-NEXT: s_add_i32 s1, s32, s0
1934
- ; GFX10-NEXT: s_add_i32 s0, s1, 4
1944
+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1945
+ ; GFX10-NEXT: s_add_i32 s1, s32, 4
1946
+ ; GFX10-NEXT: s_add_i32 s0, s0, s1
1935
1947
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
1936
1948
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1937
1949
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1987,13 +1999,13 @@ define void @store_load_large_imm_offset_foo() {
1987
1999
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
1988
2000
; UNALIGNED_GFX9: ; %bb.0: ; %bb
1989
2001
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1990
- ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
1991
2002
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
1992
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
2003
+ ; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
2004
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, 4
1993
2005
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1994
2006
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1995
2007
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1996
- ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
2008
+ ; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, s1
1997
2009
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
1998
2010
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
1999
2011
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2004,10 +2016,10 @@ define void @store_load_large_imm_offset_foo() {
2004
2016
; UNALIGNED_GFX10: ; %bb.0: ; %bb
2005
2017
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006
2018
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2007
- ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2008
2019
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2009
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2010
- ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2020
+ ; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2021
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, 4
2022
+ ; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, s1
2011
2023
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
2012
2024
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2013
2025
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0
0 commit comments