Skip to content

Commit 393c380

Browse files
ronliebeasyonaadit
authored andcommitted
Revert "AMDGPU: Handle folding frame indexes into s_add_i32 (llvm#101694)"
This reverts commit 8039886. Change-Id: I7c75bacdc5174f56f6c2ac7bcbbd4c25be824a32
1 parent 7e25825 commit 393c380

File tree

7 files changed

+249
-977
lines changed

7 files changed

+249
-977
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 0 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2445,94 +2445,7 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
24452445
MI->eraseFromParent();
24462446
return true;
24472447
}
2448-
case AMDGPU::S_ADD_I32: {
2449-
// TODO: Handle s_or_b32, s_and_b32.
2450-
unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2451-
MachineOperand &OtherOp = MI->getOperand(OtherOpIdx);
24522448

2453-
assert(FrameReg || MFI->isBottomOfStack());
2454-
2455-
MachineOperand &DstOp = MI->getOperand(0);
2456-
const DebugLoc &DL = MI->getDebugLoc();
2457-
Register MaterializedReg = FrameReg;
2458-
2459-
// Defend against live scc, which should never happen in practice.
2460-
bool DeadSCC = MI->getOperand(3).isDead();
2461-
2462-
Register TmpReg;
2463-
2464-
if (FrameReg && !ST.enableFlatScratch()) {
2465-
// FIXME: In the common case where the add does not also read its result
2466-
// (i.e. this isn't a reg += fi), it's not finding the dest reg as
2467-
// available.
2468-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass, MI,
2469-
false, 0);
2470-
BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_LSHR_B32))
2471-
.addDef(TmpReg, RegState::Renamable)
2472-
.addReg(FrameReg)
2473-
.addImm(ST.getWavefrontSizeLog2())
2474-
.setOperandDead(3); // Set SCC dead
2475-
MaterializedReg = TmpReg;
2476-
}
2477-
2478-
int64_t Offset = FrameInfo.getObjectOffset(Index);
2479-
2480-
// For the non-immediate case, we could fall through to the default
2481-
// handling, but we do an in-place update of the result register here to
2482-
// avoid scavenging another register.
2483-
if (OtherOp.isImm()) {
2484-
OtherOp.setImm(OtherOp.getImm() + Offset);
2485-
Offset = 0;
2486-
2487-
if (MaterializedReg)
2488-
FIOp.ChangeToRegister(MaterializedReg, false);
2489-
else
2490-
FIOp.ChangeToImmediate(0);
2491-
} else if (MaterializedReg) {
2492-
// If we can't fold the other operand, do another increment.
2493-
Register DstReg = DstOp.getReg();
2494-
2495-
if (!TmpReg && MaterializedReg == FrameReg) {
2496-
TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2497-
MI, false, 0);
2498-
DstReg = TmpReg;
2499-
}
2500-
2501-
auto AddI32 = BuildMI(*MBB, *MI, DL, TII->get(AMDGPU::S_ADD_I32))
2502-
.addDef(DstReg, RegState::Renamable)
2503-
.addReg(MaterializedReg, RegState::Kill)
2504-
.add(OtherOp);
2505-
if (DeadSCC)
2506-
AddI32.setOperandDead(3);
2507-
2508-
MaterializedReg = DstReg;
2509-
2510-
OtherOp.ChangeToRegister(MaterializedReg, false);
2511-
OtherOp.setIsKill(true);
2512-
OtherOp.setIsRenamable(true);
2513-
FIOp.ChangeToImmediate(Offset);
2514-
} else {
2515-
// If we don't have any other offset to apply, we can just directly
2516-
// interpret the frame index as the offset.
2517-
FIOp.ChangeToImmediate(Offset);
2518-
}
2519-
2520-
if (DeadSCC && OtherOp.isImm() && OtherOp.getImm() == 0) {
2521-
assert(Offset == 0);
2522-
MI->removeOperand(3);
2523-
MI->removeOperand(OtherOpIdx);
2524-
MI->setDesc(TII->get(FIOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2525-
} else if (DeadSCC && FIOp.isImm() && FIOp.getImm() == 0) {
2526-
assert(Offset == 0);
2527-
MI->removeOperand(3);
2528-
MI->removeOperand(FIOperandNum);
2529-
MI->setDesc(
2530-
TII->get(OtherOp.isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2531-
}
2532-
2533-
assert(!FIOp.isFI());
2534-
return true;
2535-
}
25362449
default: {
25372450
// Other access to frame index
25382451
const DebugLoc &DL = MI->getDebugLoc();

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
2121
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2222
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
2323
; GFX9-NEXT: s_and_b32 s0, s0, 15
24+
; GFX9-NEXT: s_add_i32 s1, s1, 0
2425
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
2526
; GFX9-NEXT: scratch_store_dword off, v0, s1
2627
; GFX9-NEXT: s_waitcnt vmcnt(0)
28+
; GFX9-NEXT: s_add_i32 s0, s0, 0
2729
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
2830
; GFX9-NEXT: s_waitcnt vmcnt(0)
2931
; GFX9-NEXT: s_endpgm
@@ -40,6 +42,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
4042
; GFX10-NEXT: s_and_b32 s1, s0, 15
4143
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
4244
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
45+
; GFX10-NEXT: s_add_i32 s0, s0, 0
46+
; GFX10-NEXT: s_add_i32 s1, s1, 0
4347
; GFX10-NEXT: scratch_store_dword off, v0, s0
4448
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4549
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -53,6 +57,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
5357
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
5458
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
5559
; GFX940-NEXT: s_and_b32 s0, s0, 15
60+
; GFX940-NEXT: s_add_i32 s1, s1, 0
5661
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
5762
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
5863
; GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -70,6 +75,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
7075
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
7176
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7277
; GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
78+
; GFX11-NEXT: s_add_i32 s0, s0, 0
7379
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
7480
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
7581
; GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -102,9 +108,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
102108
; UNALIGNED_GFX9-NEXT: s_waitcnt lgkmcnt(0)
103109
; UNALIGNED_GFX9-NEXT: s_lshl_b32 s1, s0, 2
104110
; UNALIGNED_GFX9-NEXT: s_and_b32 s0, s0, 15
111+
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s1, 0
105112
; UNALIGNED_GFX9-NEXT: s_lshl_b32 s0, s0, 2
106113
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s1
107114
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
115+
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 0
108116
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
109117
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
110118
; UNALIGNED_GFX9-NEXT: s_endpgm
@@ -121,6 +129,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
121129
; UNALIGNED_GFX10-NEXT: s_and_b32 s1, s0, 15
122130
; UNALIGNED_GFX10-NEXT: s_lshl_b32 s0, s0, 2
123131
; UNALIGNED_GFX10-NEXT: s_lshl_b32 s1, s1, 2
132+
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 0
133+
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s1, 0
124134
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s0
125135
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
126136
; UNALIGNED_GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -134,6 +144,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
134144
; UNALIGNED_GFX940-NEXT: s_waitcnt lgkmcnt(0)
135145
; UNALIGNED_GFX940-NEXT: s_lshl_b32 s1, s0, 2
136146
; UNALIGNED_GFX940-NEXT: s_and_b32 s0, s0, 15
147+
; UNALIGNED_GFX940-NEXT: s_add_i32 s1, s1, 0
137148
; UNALIGNED_GFX940-NEXT: s_lshl_b32 s0, s0, 2
138149
; UNALIGNED_GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
139150
; UNALIGNED_GFX940-NEXT: s_waitcnt vmcnt(0)
@@ -151,6 +162,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
151162
; UNALIGNED_GFX11-NEXT: s_lshl_b32 s1, s1, 2
152163
; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
153164
; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 15 :: v_dual_mov_b32 v1, s1
165+
; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 0
154166
; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
155167
; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0
156168
; UNALIGNED_GFX11-NEXT: scratch_load_b32 v0, v1, off glc dlc
@@ -1911,13 +1923,13 @@ define void @store_load_large_imm_offset_foo() {
19111923
; GFX9-LABEL: store_load_large_imm_offset_foo:
19121924
; GFX9: ; %bb.0: ; %bb
19131925
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1914-
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
19151926
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1916-
; GFX9-NEXT: s_add_i32 s1, s32, s0
1927+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1928+
; GFX9-NEXT: s_add_i32 s1, s32, 4
19171929
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19181930
; GFX9-NEXT: s_waitcnt vmcnt(0)
19191931
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1920-
; GFX9-NEXT: s_add_i32 s0, s1, 4
1932+
; GFX9-NEXT: s_add_i32 s0, s0, s1
19211933
; GFX9-NEXT: scratch_store_dword off, v0, s0
19221934
; GFX9-NEXT: s_waitcnt vmcnt(0)
19231935
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1928,10 +1940,10 @@ define void @store_load_large_imm_offset_foo() {
19281940
; GFX10: ; %bb.0: ; %bb
19291941
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19301942
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1931-
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
19321943
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1933-
; GFX10-NEXT: s_add_i32 s1, s32, s0
1934-
; GFX10-NEXT: s_add_i32 s0, s1, 4
1944+
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1945+
; GFX10-NEXT: s_add_i32 s1, s32, 4
1946+
; GFX10-NEXT: s_add_i32 s0, s0, s1
19351947
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
19361948
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
19371949
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1987,13 +1999,13 @@ define void @store_load_large_imm_offset_foo() {
19871999
; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo:
19882000
; UNALIGNED_GFX9: ; %bb.0: ; %bb
19892001
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1990-
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
19912002
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13
1992-
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0
2003+
; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80
2004+
; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, 4
19932005
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
19942006
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19952007
; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15
1996-
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4
2008+
; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, s1
19972009
; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0
19982010
; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0)
19992011
; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -2004,10 +2016,10 @@ define void @store_load_large_imm_offset_foo() {
20042016
; UNALIGNED_GFX10: ; %bb.0: ; %bb
20052017
; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20062018
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13
2007-
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
20082019
; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15
2009-
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0
2010-
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4
2020+
; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80
2021+
; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, 4
2022+
; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, s1
20112023
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
20122024
; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20132025
; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0

0 commit comments

Comments
 (0)