Skip to content

Commit 79354df

Browse files
vg0204AlexisPerry
authored andcommitted
Revert "[AMDGPU]Optimize SGPR spills (llvm#93668)"
This reverts commit 4b9112e. A separate issue(llvm#96353) describing it has been opened to further keep its track.
1 parent 5704654 commit 79354df

7 files changed

+102
-126
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,11 +1437,6 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14371437
// since FastRegAlloc does the replacements itself.
14381438
addPass(createVirtRegRewriter(false));
14391439

1440-
// At this point, the sgpr-regalloc has been done and it is good to have the
1441-
// stack slot coloring to try to optimize the SGPR spill stack indices before
1442-
// attempting the custom SGPR spill lowering.
1443-
addPass(&StackSlotColoringID);
1444-
14451440
// Equivalent of PEI for SGPRs.
14461441
addPass(&SILowerSGPRSpillsID);
14471442
addPass(&SIPreAllocateWWMRegsID);

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,13 +1775,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
17751775

17761776
if (SpillToVGPR) {
17771777

1778-
// Since stack slot coloring pass is trying to optimize SGPR spills,
1779-
// VGPR lanes (mapped from spill stack slot) may be shared for SGPR
1780-
// spills of different sizes. This accounts for number of VGPR lanes alloted
1781-
// equal to the largest SGPR being spilled in them.
1782-
assert(SB.NumSubRegs <= VGPRSpills.size() &&
1783-
"Num of SGPRs spilled should be less than or equal to num of "
1784-
"the VGPR lanes.");
1778+
assert(SB.NumSubRegs == VGPRSpills.size() &&
1779+
"Num of VGPR lanes should be equal to num of SGPRs spilled");
17851780

17861781
for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
17871782
Register SubReg =

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -366,12 +366,10 @@
366366
; GCN-O1-NEXT: Machine Optimization Remark Emitter
367367
; GCN-O1-NEXT: Greedy Register Allocator
368368
; GCN-O1-NEXT: Virtual Register Rewriter
369-
; GCN-O1-NEXT: Stack Slot Coloring
370369
; GCN-O1-NEXT: SI lower SGPR spill instructions
371370
; GCN-O1-NEXT: Virtual Register Map
372371
; GCN-O1-NEXT: Live Register Matrix
373372
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
374-
; GCN-O1-NEXT: Live Stack Slot Analysis
375373
; GCN-O1-NEXT: Greedy Register Allocator
376374
; GCN-O1-NEXT: SI Lower WWM Copies
377375
; GCN-O1-NEXT: GCN NSA Reassign
@@ -673,12 +671,10 @@
673671
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
674672
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
675673
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
676-
; GCN-O1-OPTS-NEXT: Stack Slot Coloring
677674
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
678675
; GCN-O1-OPTS-NEXT: Virtual Register Map
679676
; GCN-O1-OPTS-NEXT: Live Register Matrix
680677
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
681-
; GCN-O1-OPTS-NEXT: Live Stack Slot Analysis
682678
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
683679
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
684680
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
@@ -986,12 +982,10 @@
986982
; GCN-O2-NEXT: Machine Optimization Remark Emitter
987983
; GCN-O2-NEXT: Greedy Register Allocator
988984
; GCN-O2-NEXT: Virtual Register Rewriter
989-
; GCN-O2-NEXT: Stack Slot Coloring
990985
; GCN-O2-NEXT: SI lower SGPR spill instructions
991986
; GCN-O2-NEXT: Virtual Register Map
992987
; GCN-O2-NEXT: Live Register Matrix
993988
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
994-
; GCN-O2-NEXT: Live Stack Slot Analysis
995989
; GCN-O2-NEXT: Greedy Register Allocator
996990
; GCN-O2-NEXT: SI Lower WWM Copies
997991
; GCN-O2-NEXT: GCN NSA Reassign
@@ -1311,12 +1305,10 @@
13111305
; GCN-O3-NEXT: Machine Optimization Remark Emitter
13121306
; GCN-O3-NEXT: Greedy Register Allocator
13131307
; GCN-O3-NEXT: Virtual Register Rewriter
1314-
; GCN-O3-NEXT: Stack Slot Coloring
13151308
; GCN-O3-NEXT: SI lower SGPR spill instructions
13161309
; GCN-O3-NEXT: Virtual Register Map
13171310
; GCN-O3-NEXT: Live Register Matrix
13181311
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
1319-
; GCN-O3-NEXT: Live Stack Slot Analysis
13201312
; GCN-O3-NEXT: Greedy Register Allocator
13211313
; GCN-O3-NEXT: SI Lower WWM Copies
13221314
; GCN-O3-NEXT: GCN NSA Reassign

llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -221,15 +221,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
221221
; GFX906-NEXT: ; def s29
222222
; GFX906-NEXT: ;;#ASMEND
223223
; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
224-
; GFX906-NEXT: v_writelane_b32 v40, s21, 12
225-
; GFX906-NEXT: v_writelane_b32 v40, s22, 13
226-
; GFX906-NEXT: v_writelane_b32 v40, s23, 14
227-
; GFX906-NEXT: v_writelane_b32 v40, s24, 15
228-
; GFX906-NEXT: v_writelane_b32 v40, s25, 16
229-
; GFX906-NEXT: v_writelane_b32 v40, s26, 17
230-
; GFX906-NEXT: v_writelane_b32 v40, s27, 18
231-
; GFX906-NEXT: v_writelane_b32 v40, s28, 19
232-
; GFX906-NEXT: v_writelane_b32 v40, s29, 20
224+
; GFX906-NEXT: v_writelane_b32 v40, s21, 24
225+
; GFX906-NEXT: v_writelane_b32 v40, s22, 25
226+
; GFX906-NEXT: v_writelane_b32 v40, s23, 26
227+
; GFX906-NEXT: v_writelane_b32 v40, s24, 27
228+
; GFX906-NEXT: v_writelane_b32 v40, s25, 28
229+
; GFX906-NEXT: v_writelane_b32 v40, s26, 29
230+
; GFX906-NEXT: v_writelane_b32 v40, s27, 30
231+
; GFX906-NEXT: v_writelane_b32 v40, s28, 31
232+
; GFX906-NEXT: v_writelane_b32 v40, s29, 32
233233
; GFX906-NEXT: v_readlane_b32 s4, v40, 10
234234
; GFX906-NEXT: v_readlane_b32 s6, v40, 0
235235
; GFX906-NEXT: v_readlane_b32 s8, v40, 8
@@ -249,39 +249,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
249249
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
250250
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
251251
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
252-
; GFX906-NEXT: v_readlane_b32 s21, v40, 12
252+
; GFX906-NEXT: v_readlane_b32 s21, v40, 24
253253
; GFX906-NEXT: ;;#ASMSTART
254254
; GFX906-NEXT: ; use s21
255255
; GFX906-NEXT: ;;#ASMEND
256-
; GFX906-NEXT: v_readlane_b32 s22, v40, 13
256+
; GFX906-NEXT: v_readlane_b32 s22, v40, 25
257257
; GFX906-NEXT: ;;#ASMSTART
258258
; GFX906-NEXT: ; use s22
259259
; GFX906-NEXT: ;;#ASMEND
260-
; GFX906-NEXT: v_readlane_b32 s23, v40, 14
260+
; GFX906-NEXT: v_readlane_b32 s23, v40, 26
261261
; GFX906-NEXT: ;;#ASMSTART
262262
; GFX906-NEXT: ; use s23
263263
; GFX906-NEXT: ;;#ASMEND
264-
; GFX906-NEXT: v_readlane_b32 s24, v40, 15
264+
; GFX906-NEXT: v_readlane_b32 s24, v40, 27
265265
; GFX906-NEXT: ;;#ASMSTART
266266
; GFX906-NEXT: ; use s24
267267
; GFX906-NEXT: ;;#ASMEND
268-
; GFX906-NEXT: v_readlane_b32 s25, v40, 16
268+
; GFX906-NEXT: v_readlane_b32 s25, v40, 28
269269
; GFX906-NEXT: ;;#ASMSTART
270270
; GFX906-NEXT: ; use s25
271271
; GFX906-NEXT: ;;#ASMEND
272-
; GFX906-NEXT: v_readlane_b32 s26, v40, 17
272+
; GFX906-NEXT: v_readlane_b32 s26, v40, 29
273273
; GFX906-NEXT: ;;#ASMSTART
274274
; GFX906-NEXT: ; use s26
275275
; GFX906-NEXT: ;;#ASMEND
276-
; GFX906-NEXT: v_readlane_b32 s27, v40, 18
276+
; GFX906-NEXT: v_readlane_b32 s27, v40, 30
277277
; GFX906-NEXT: ;;#ASMSTART
278278
; GFX906-NEXT: ; use s27
279279
; GFX906-NEXT: ;;#ASMEND
280-
; GFX906-NEXT: v_readlane_b32 s28, v40, 19
280+
; GFX906-NEXT: v_readlane_b32 s28, v40, 31
281281
; GFX906-NEXT: ;;#ASMSTART
282282
; GFX906-NEXT: ; use s28
283283
; GFX906-NEXT: ;;#ASMEND
284-
; GFX906-NEXT: v_readlane_b32 s29, v40, 20
284+
; GFX906-NEXT: v_readlane_b32 s29, v40, 32
285285
; GFX906-NEXT: ;;#ASMSTART
286286
; GFX906-NEXT: ; use s29
287287
; GFX906-NEXT: ;;#ASMEND
@@ -602,15 +602,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
602602
; GFX908-NEXT: ; def s29
603603
; GFX908-NEXT: ;;#ASMEND
604604
; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
605-
; GFX908-NEXT: v_writelane_b32 v40, s21, 12
606-
; GFX908-NEXT: v_writelane_b32 v40, s22, 13
607-
; GFX908-NEXT: v_writelane_b32 v40, s23, 14
608-
; GFX908-NEXT: v_writelane_b32 v40, s24, 15
609-
; GFX908-NEXT: v_writelane_b32 v40, s25, 16
610-
; GFX908-NEXT: v_writelane_b32 v40, s26, 17
611-
; GFX908-NEXT: v_writelane_b32 v40, s27, 18
612-
; GFX908-NEXT: v_writelane_b32 v40, s28, 19
613-
; GFX908-NEXT: v_writelane_b32 v40, s29, 20
605+
; GFX908-NEXT: v_writelane_b32 v40, s21, 24
606+
; GFX908-NEXT: v_writelane_b32 v40, s22, 25
607+
; GFX908-NEXT: v_writelane_b32 v40, s23, 26
608+
; GFX908-NEXT: v_writelane_b32 v40, s24, 27
609+
; GFX908-NEXT: v_writelane_b32 v40, s25, 28
610+
; GFX908-NEXT: v_writelane_b32 v40, s26, 29
611+
; GFX908-NEXT: v_writelane_b32 v40, s27, 30
612+
; GFX908-NEXT: v_writelane_b32 v40, s28, 31
613+
; GFX908-NEXT: v_writelane_b32 v40, s29, 32
614614
; GFX908-NEXT: v_readlane_b32 s4, v40, 10
615615
; GFX908-NEXT: v_readlane_b32 s6, v40, 0
616616
; GFX908-NEXT: v_readlane_b32 s8, v40, 8
@@ -630,39 +630,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
630630
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
631631
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
632632
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
633-
; GFX908-NEXT: v_readlane_b32 s21, v40, 12
633+
; GFX908-NEXT: v_readlane_b32 s21, v40, 24
634634
; GFX908-NEXT: ;;#ASMSTART
635635
; GFX908-NEXT: ; use s21
636636
; GFX908-NEXT: ;;#ASMEND
637-
; GFX908-NEXT: v_readlane_b32 s22, v40, 13
637+
; GFX908-NEXT: v_readlane_b32 s22, v40, 25
638638
; GFX908-NEXT: ;;#ASMSTART
639639
; GFX908-NEXT: ; use s22
640640
; GFX908-NEXT: ;;#ASMEND
641-
; GFX908-NEXT: v_readlane_b32 s23, v40, 14
641+
; GFX908-NEXT: v_readlane_b32 s23, v40, 26
642642
; GFX908-NEXT: ;;#ASMSTART
643643
; GFX908-NEXT: ; use s23
644644
; GFX908-NEXT: ;;#ASMEND
645-
; GFX908-NEXT: v_readlane_b32 s24, v40, 15
645+
; GFX908-NEXT: v_readlane_b32 s24, v40, 27
646646
; GFX908-NEXT: ;;#ASMSTART
647647
; GFX908-NEXT: ; use s24
648648
; GFX908-NEXT: ;;#ASMEND
649-
; GFX908-NEXT: v_readlane_b32 s25, v40, 16
649+
; GFX908-NEXT: v_readlane_b32 s25, v40, 28
650650
; GFX908-NEXT: ;;#ASMSTART
651651
; GFX908-NEXT: ; use s25
652652
; GFX908-NEXT: ;;#ASMEND
653-
; GFX908-NEXT: v_readlane_b32 s26, v40, 17
653+
; GFX908-NEXT: v_readlane_b32 s26, v40, 29
654654
; GFX908-NEXT: ;;#ASMSTART
655655
; GFX908-NEXT: ; use s26
656656
; GFX908-NEXT: ;;#ASMEND
657-
; GFX908-NEXT: v_readlane_b32 s27, v40, 18
657+
; GFX908-NEXT: v_readlane_b32 s27, v40, 30
658658
; GFX908-NEXT: ;;#ASMSTART
659659
; GFX908-NEXT: ; use s27
660660
; GFX908-NEXT: ;;#ASMEND
661-
; GFX908-NEXT: v_readlane_b32 s28, v40, 19
661+
; GFX908-NEXT: v_readlane_b32 s28, v40, 31
662662
; GFX908-NEXT: ;;#ASMSTART
663663
; GFX908-NEXT: ; use s28
664664
; GFX908-NEXT: ;;#ASMEND
665-
; GFX908-NEXT: v_readlane_b32 s29, v40, 20
665+
; GFX908-NEXT: v_readlane_b32 s29, v40, 32
666666
; GFX908-NEXT: ;;#ASMSTART
667667
; GFX908-NEXT: ; use s29
668668
; GFX908-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,10 @@
1717

1818
; DEFAULT: Greedy Register Allocator
1919
; DEFAULT-NEXT: Virtual Register Rewriter
20-
; DEFAULT-NEXT: Stack Slot Coloring
2120
; DEFAULT-NEXT: SI lower SGPR spill instructions
2221
; DEFAULT-NEXT: Virtual Register Map
2322
; DEFAULT-NEXT: Live Register Matrix
2423
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
25-
; DEFAULT-NEXT: Live Stack Slot Analysis
2624
; DEFAULT-NEXT: Greedy Register Allocator
2725
; DEFAULT-NEXT: SI Lower WWM Copies
2826
; DEFAULT-NEXT: GCN NSA Reassign
@@ -52,12 +50,10 @@
5250
; BASIC-DEFAULT-NEXT: Live Register Matrix
5351
; BASIC-DEFAULT-NEXT: Basic Register Allocator
5452
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
55-
; BASIC-DEFAULT-NEXT: Stack Slot Coloring
5653
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
5754
; BASIC-DEFAULT-NEXT: Virtual Register Map
5855
; BASIC-DEFAULT-NEXT: Live Register Matrix
5956
; BASIC-DEFAULT-NEXT: SI Pre-allocate WWM Registers
60-
; BASIC-DEFAULT-NEXT: Live Stack Slot Analysis
6157
; BASIC-DEFAULT-NEXT: Bundle Machine CFG Edges
6258
; BASIC-DEFAULT-NEXT: Spill Code Placement Analysis
6359
; BASIC-DEFAULT-NEXT: Lazy Machine Block Frequency Analysis
@@ -73,12 +69,10 @@
7369

7470
; DEFAULT-BASIC: Greedy Register Allocator
7571
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
76-
; DEFAULT-BASIC-NEXT: Stack Slot Coloring
7772
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
7873
; DEFAULT-BASIC-NEXT: Virtual Register Map
7974
; DEFAULT-BASIC-NEXT: Live Register Matrix
8075
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
81-
; DEFAULT-BASIC-NEXT: Live Stack Slot Analysis
8276
; DEFAULT-BASIC-NEXT: Basic Register Allocator
8377
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
8478
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
@@ -96,12 +90,10 @@
9690
; BASIC-BASIC-NEXT: Live Register Matrix
9791
; BASIC-BASIC-NEXT: Basic Register Allocator
9892
; BASIC-BASIC-NEXT: Virtual Register Rewriter
99-
; BASIC-BASIC-NEXT: Stack Slot Coloring
10093
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
10194
; BASIC-BASIC-NEXT: Virtual Register Map
10295
; BASIC-BASIC-NEXT: Live Register Matrix
10396
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
104-
; BASIC-BASIC-NEXT: Live Stack Slot Analysis
10597
; BASIC-BASIC-NEXT: Basic Register Allocator
10698
; BASIC-BASIC-NEXT: SI Lower WWM Copies
10799
; BASIC-BASIC-NEXT: GCN NSA Reassign

0 commit comments

Comments
 (0)