Skip to content

Commit 870bdc6

Browse files
committed
Reapply "[AMDGPU]Optimize SGPR spills (llvm#93668)"
This reverts commit c2fc7f7. As the dependent patch about split vgpr regalloc pipeline solved the issue(llvm#96353).
1 parent 6c331e5 commit 870bdc6

7 files changed

+126
-102
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,6 +1533,11 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
15331533
// since FastRegAlloc does the replacements itself.
15341534
addPass(createVirtRegRewriter(false));
15351535

1536+
// At this point, the sgpr-regalloc has been done and it is good to have the
1537+
// stack slot coloring to try to optimize the SGPR spill stack indices before
1538+
// attempting the custom SGPR spill lowering.
1539+
addPass(&StackSlotColoringID);
1540+
15361541
// Equivalent of PEI for SGPRs.
15371542
addPass(&SILowerSGPRSpillsLegacyID);
15381543

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1959,8 +1959,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
19591959

19601960
if (SpillToVGPR) {
19611961

1962-
assert(SB.NumSubRegs == VGPRSpills.size() &&
1963-
"Num of VGPR lanes should be equal to num of SGPRs spilled");
1962+
// Since stack slot coloring pass is trying to optimize SGPR spills,
1963+
// VGPR lanes (mapped from spill stack slot) may be shared for SGPR
1964+
// spills of different sizes. This accounts for number of VGPR lanes alloted
1965+
// equal to the largest SGPR being spilled in them.
1966+
assert(SB.NumSubRegs <= VGPRSpills.size() &&
1967+
"Num of SGPRs spilled should be less than or equal to num of "
1968+
"the VGPR lanes.");
19641969

19651970
for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
19661971
Register SubReg =

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,10 +366,12 @@
366366
; GCN-O1-NEXT: Machine Optimization Remark Emitter
367367
; GCN-O1-NEXT: Greedy Register Allocator
368368
; GCN-O1-NEXT: Virtual Register Rewriter
369+
; GCN-O1-NEXT: Stack Slot Coloring
369370
; GCN-O1-NEXT: SI lower SGPR spill instructions
370371
; GCN-O1-NEXT: Virtual Register Map
371372
; GCN-O1-NEXT: Live Register Matrix
372373
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
374+
; GCN-O1-NEXT: Live Stack Slot Analysis
373375
; GCN-O1-NEXT: Greedy Register Allocator
374376
; GCN-O1-NEXT: SI Lower WWM Copies
375377
; GCN-O1-NEXT: Virtual Register Rewriter
@@ -674,10 +676,12 @@
674676
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
675677
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
676678
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
679+
; GCN-O1-OPTS-NEXT: Stack Slot Coloring
677680
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
678681
; GCN-O1-OPTS-NEXT: Virtual Register Map
679682
; GCN-O1-OPTS-NEXT: Live Register Matrix
680683
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
684+
; GCN-O1-OPTS-NEXT: Live Stack Slot Analysis
681685
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
682686
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
683687
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
@@ -988,10 +992,12 @@
988992
; GCN-O2-NEXT: Machine Optimization Remark Emitter
989993
; GCN-O2-NEXT: Greedy Register Allocator
990994
; GCN-O2-NEXT: Virtual Register Rewriter
995+
; GCN-O2-NEXT: Stack Slot Coloring
991996
; GCN-O2-NEXT: SI lower SGPR spill instructions
992997
; GCN-O2-NEXT: Virtual Register Map
993998
; GCN-O2-NEXT: Live Register Matrix
994999
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
1000+
; GCN-O2-NEXT: Live Stack Slot Analysis
9951001
; GCN-O2-NEXT: Greedy Register Allocator
9961002
; GCN-O2-NEXT: SI Lower WWM Copies
9971003
; GCN-O2-NEXT: Virtual Register Rewriter
@@ -1314,10 +1320,12 @@
13141320
; GCN-O3-NEXT: Machine Optimization Remark Emitter
13151321
; GCN-O3-NEXT: Greedy Register Allocator
13161322
; GCN-O3-NEXT: Virtual Register Rewriter
1323+
; GCN-O3-NEXT: Stack Slot Coloring
13171324
; GCN-O3-NEXT: SI lower SGPR spill instructions
13181325
; GCN-O3-NEXT: Virtual Register Map
13191326
; GCN-O3-NEXT: Live Register Matrix
13201327
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
1328+
; GCN-O3-NEXT: Live Stack Slot Analysis
13211329
; GCN-O3-NEXT: Greedy Register Allocator
13221330
; GCN-O3-NEXT: SI Lower WWM Copies
13231331
; GCN-O3-NEXT: Virtual Register Rewriter

llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -196,39 +196,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
196196
; GFX906-NEXT: ;;#ASMSTART
197197
; GFX906-NEXT: ; def s21
198198
; GFX906-NEXT: ;;#ASMEND
199-
; GFX906-NEXT: v_writelane_b32 v39, s21, 24
199+
; GFX906-NEXT: v_writelane_b32 v39, s21, 12
200200
; GFX906-NEXT: ;;#ASMSTART
201201
; GFX906-NEXT: ; def s22
202202
; GFX906-NEXT: ;;#ASMEND
203-
; GFX906-NEXT: v_writelane_b32 v39, s22, 25
203+
; GFX906-NEXT: v_writelane_b32 v39, s22, 13
204204
; GFX906-NEXT: ;;#ASMSTART
205205
; GFX906-NEXT: ; def s23
206206
; GFX906-NEXT: ;;#ASMEND
207-
; GFX906-NEXT: v_writelane_b32 v39, s23, 26
207+
; GFX906-NEXT: v_writelane_b32 v39, s23, 14
208208
; GFX906-NEXT: ;;#ASMSTART
209209
; GFX906-NEXT: ; def s24
210210
; GFX906-NEXT: ;;#ASMEND
211-
; GFX906-NEXT: v_writelane_b32 v39, s24, 27
211+
; GFX906-NEXT: v_writelane_b32 v39, s24, 15
212212
; GFX906-NEXT: ;;#ASMSTART
213213
; GFX906-NEXT: ; def s25
214214
; GFX906-NEXT: ;;#ASMEND
215-
; GFX906-NEXT: v_writelane_b32 v39, s25, 28
215+
; GFX906-NEXT: v_writelane_b32 v39, s25, 16
216216
; GFX906-NEXT: ;;#ASMSTART
217217
; GFX906-NEXT: ; def s26
218218
; GFX906-NEXT: ;;#ASMEND
219-
; GFX906-NEXT: v_writelane_b32 v39, s26, 29
219+
; GFX906-NEXT: v_writelane_b32 v39, s26, 17
220220
; GFX906-NEXT: ;;#ASMSTART
221221
; GFX906-NEXT: ; def s27
222222
; GFX906-NEXT: ;;#ASMEND
223-
; GFX906-NEXT: v_writelane_b32 v39, s27, 30
223+
; GFX906-NEXT: v_writelane_b32 v39, s27, 18
224224
; GFX906-NEXT: ;;#ASMSTART
225225
; GFX906-NEXT: ; def s28
226226
; GFX906-NEXT: ;;#ASMEND
227-
; GFX906-NEXT: v_writelane_b32 v39, s28, 31
227+
; GFX906-NEXT: v_writelane_b32 v39, s28, 19
228228
; GFX906-NEXT: ;;#ASMSTART
229229
; GFX906-NEXT: ; def s29
230230
; GFX906-NEXT: ;;#ASMEND
231-
; GFX906-NEXT: v_writelane_b32 v39, s29, 32
231+
; GFX906-NEXT: v_writelane_b32 v39, s29, 20
232232
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
233233
; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
234234
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
@@ -267,39 +267,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
267267
; GFX906-NEXT: v_readlane_b32 s15, v39, 2
268268
; GFX906-NEXT: v_mov_b32_e32 v31, v40
269269
; GFX906-NEXT: v_readlane_b32 s17, v39, 23
270-
; GFX906-NEXT: v_readlane_b32 s21, v39, 24
270+
; GFX906-NEXT: v_readlane_b32 s21, v39, 12
271271
; GFX906-NEXT: ;;#ASMSTART
272272
; GFX906-NEXT: ; use s21
273273
; GFX906-NEXT: ;;#ASMEND
274-
; GFX906-NEXT: v_readlane_b32 s22, v39, 25
274+
; GFX906-NEXT: v_readlane_b32 s22, v39, 13
275275
; GFX906-NEXT: ;;#ASMSTART
276276
; GFX906-NEXT: ; use s22
277277
; GFX906-NEXT: ;;#ASMEND
278-
; GFX906-NEXT: v_readlane_b32 s23, v39, 26
278+
; GFX906-NEXT: v_readlane_b32 s23, v39, 14
279279
; GFX906-NEXT: ;;#ASMSTART
280280
; GFX906-NEXT: ; use s23
281281
; GFX906-NEXT: ;;#ASMEND
282-
; GFX906-NEXT: v_readlane_b32 s24, v39, 27
282+
; GFX906-NEXT: v_readlane_b32 s24, v39, 15
283283
; GFX906-NEXT: ;;#ASMSTART
284284
; GFX906-NEXT: ; use s24
285285
; GFX906-NEXT: ;;#ASMEND
286-
; GFX906-NEXT: v_readlane_b32 s25, v39, 28
286+
; GFX906-NEXT: v_readlane_b32 s25, v39, 16
287287
; GFX906-NEXT: ;;#ASMSTART
288288
; GFX906-NEXT: ; use s25
289289
; GFX906-NEXT: ;;#ASMEND
290-
; GFX906-NEXT: v_readlane_b32 s26, v39, 29
290+
; GFX906-NEXT: v_readlane_b32 s26, v39, 17
291291
; GFX906-NEXT: ;;#ASMSTART
292292
; GFX906-NEXT: ; use s26
293293
; GFX906-NEXT: ;;#ASMEND
294-
; GFX906-NEXT: v_readlane_b32 s27, v39, 30
294+
; GFX906-NEXT: v_readlane_b32 s27, v39, 18
295295
; GFX906-NEXT: ;;#ASMSTART
296296
; GFX906-NEXT: ; use s27
297297
; GFX906-NEXT: ;;#ASMEND
298-
; GFX906-NEXT: v_readlane_b32 s28, v39, 31
298+
; GFX906-NEXT: v_readlane_b32 s28, v39, 19
299299
; GFX906-NEXT: ;;#ASMSTART
300300
; GFX906-NEXT: ; use s28
301301
; GFX906-NEXT: ;;#ASMEND
302-
; GFX906-NEXT: v_readlane_b32 s29, v39, 32
302+
; GFX906-NEXT: v_readlane_b32 s29, v39, 20
303303
; GFX906-NEXT: ;;#ASMSTART
304304
; GFX906-NEXT: ; use s29
305305
; GFX906-NEXT: ;;#ASMEND
@@ -575,39 +575,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
575575
; GFX908-NEXT: ;;#ASMSTART
576576
; GFX908-NEXT: ; def s21
577577
; GFX908-NEXT: ;;#ASMEND
578-
; GFX908-NEXT: v_writelane_b32 v39, s21, 24
578+
; GFX908-NEXT: v_writelane_b32 v39, s21, 12
579579
; GFX908-NEXT: ;;#ASMSTART
580580
; GFX908-NEXT: ; def s22
581581
; GFX908-NEXT: ;;#ASMEND
582-
; GFX908-NEXT: v_writelane_b32 v39, s22, 25
582+
; GFX908-NEXT: v_writelane_b32 v39, s22, 13
583583
; GFX908-NEXT: ;;#ASMSTART
584584
; GFX908-NEXT: ; def s23
585585
; GFX908-NEXT: ;;#ASMEND
586-
; GFX908-NEXT: v_writelane_b32 v39, s23, 26
586+
; GFX908-NEXT: v_writelane_b32 v39, s23, 14
587587
; GFX908-NEXT: ;;#ASMSTART
588588
; GFX908-NEXT: ; def s24
589589
; GFX908-NEXT: ;;#ASMEND
590-
; GFX908-NEXT: v_writelane_b32 v39, s24, 27
590+
; GFX908-NEXT: v_writelane_b32 v39, s24, 15
591591
; GFX908-NEXT: ;;#ASMSTART
592592
; GFX908-NEXT: ; def s25
593593
; GFX908-NEXT: ;;#ASMEND
594-
; GFX908-NEXT: v_writelane_b32 v39, s25, 28
594+
; GFX908-NEXT: v_writelane_b32 v39, s25, 16
595595
; GFX908-NEXT: ;;#ASMSTART
596596
; GFX908-NEXT: ; def s26
597597
; GFX908-NEXT: ;;#ASMEND
598-
; GFX908-NEXT: v_writelane_b32 v39, s26, 29
598+
; GFX908-NEXT: v_writelane_b32 v39, s26, 17
599599
; GFX908-NEXT: ;;#ASMSTART
600600
; GFX908-NEXT: ; def s27
601601
; GFX908-NEXT: ;;#ASMEND
602-
; GFX908-NEXT: v_writelane_b32 v39, s27, 30
602+
; GFX908-NEXT: v_writelane_b32 v39, s27, 18
603603
; GFX908-NEXT: ;;#ASMSTART
604604
; GFX908-NEXT: ; def s28
605605
; GFX908-NEXT: ;;#ASMEND
606-
; GFX908-NEXT: v_writelane_b32 v39, s28, 31
606+
; GFX908-NEXT: v_writelane_b32 v39, s28, 19
607607
; GFX908-NEXT: ;;#ASMSTART
608608
; GFX908-NEXT: ; def s29
609609
; GFX908-NEXT: ;;#ASMEND
610-
; GFX908-NEXT: v_writelane_b32 v39, s29, 32
610+
; GFX908-NEXT: v_writelane_b32 v39, s29, 20
611611
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
612612
; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
613613
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
@@ -646,39 +646,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
646646
; GFX908-NEXT: v_readlane_b32 s15, v39, 2
647647
; GFX908-NEXT: v_mov_b32_e32 v31, v40
648648
; GFX908-NEXT: v_readlane_b32 s17, v39, 23
649-
; GFX908-NEXT: v_readlane_b32 s21, v39, 24
649+
; GFX908-NEXT: v_readlane_b32 s21, v39, 12
650650
; GFX908-NEXT: ;;#ASMSTART
651651
; GFX908-NEXT: ; use s21
652652
; GFX908-NEXT: ;;#ASMEND
653-
; GFX908-NEXT: v_readlane_b32 s22, v39, 25
653+
; GFX908-NEXT: v_readlane_b32 s22, v39, 13
654654
; GFX908-NEXT: ;;#ASMSTART
655655
; GFX908-NEXT: ; use s22
656656
; GFX908-NEXT: ;;#ASMEND
657-
; GFX908-NEXT: v_readlane_b32 s23, v39, 26
657+
; GFX908-NEXT: v_readlane_b32 s23, v39, 14
658658
; GFX908-NEXT: ;;#ASMSTART
659659
; GFX908-NEXT: ; use s23
660660
; GFX908-NEXT: ;;#ASMEND
661-
; GFX908-NEXT: v_readlane_b32 s24, v39, 27
661+
; GFX908-NEXT: v_readlane_b32 s24, v39, 15
662662
; GFX908-NEXT: ;;#ASMSTART
663663
; GFX908-NEXT: ; use s24
664664
; GFX908-NEXT: ;;#ASMEND
665-
; GFX908-NEXT: v_readlane_b32 s25, v39, 28
665+
; GFX908-NEXT: v_readlane_b32 s25, v39, 16
666666
; GFX908-NEXT: ;;#ASMSTART
667667
; GFX908-NEXT: ; use s25
668668
; GFX908-NEXT: ;;#ASMEND
669-
; GFX908-NEXT: v_readlane_b32 s26, v39, 29
669+
; GFX908-NEXT: v_readlane_b32 s26, v39, 17
670670
; GFX908-NEXT: ;;#ASMSTART
671671
; GFX908-NEXT: ; use s26
672672
; GFX908-NEXT: ;;#ASMEND
673-
; GFX908-NEXT: v_readlane_b32 s27, v39, 30
673+
; GFX908-NEXT: v_readlane_b32 s27, v39, 18
674674
; GFX908-NEXT: ;;#ASMSTART
675675
; GFX908-NEXT: ; use s27
676676
; GFX908-NEXT: ;;#ASMEND
677-
; GFX908-NEXT: v_readlane_b32 s28, v39, 31
677+
; GFX908-NEXT: v_readlane_b32 s28, v39, 19
678678
; GFX908-NEXT: ;;#ASMSTART
679679
; GFX908-NEXT: ; use s28
680680
; GFX908-NEXT: ;;#ASMEND
681-
; GFX908-NEXT: v_readlane_b32 s29, v39, 32
681+
; GFX908-NEXT: v_readlane_b32 s29, v39, 20
682682
; GFX908-NEXT: ;;#ASMSTART
683683
; GFX908-NEXT: ; use s29
684684
; GFX908-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
; DEFAULT: Greedy Register Allocator
1919
; DEFAULT-NEXT: Virtual Register Rewriter
20+
; DEFAULT-NEXT: Stack Slot Coloring
2021
; DEFAULT-NEXT: SI lower SGPR spill instructions
2122
; DEFAULT-NEXT: Virtual Register Map
2223
; DEFAULT-NEXT: Live Register Matrix
2324
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
25+
; DEFAULT-NEXT: Live Stack Slot Analysis
2426
; DEFAULT-NEXT: Greedy Register Allocator
2527
; DEFAULT-NEXT: SI Lower WWM Copies
2628
; DEFAULT-NEXT: Virtual Register Rewriter
@@ -57,10 +59,12 @@
5759
; BASIC-DEFAULT-NEXT: Live Register Matrix
5860
; BASIC-DEFAULT-NEXT: Basic Register Allocator
5961
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
62+
; BASIC-DEFAULT-NEXT: Stack Slot Coloring
6063
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
6164
; BASIC-DEFAULT-NEXT: Virtual Register Map
6265
; BASIC-DEFAULT-NEXT: Live Register Matrix
6366
; BASIC-DEFAULT-NEXT: SI Pre-allocate WWM Registers
67+
; BASIC-DEFAULT-NEXT: Live Stack Slot Analysis
6468
; BASIC-DEFAULT-NEXT: Bundle Machine CFG Edges
6569
; BASIC-DEFAULT-NEXT: Spill Code Placement Analysis
6670
; BASIC-DEFAULT-NEXT: Lazy Machine Block Frequency Analysis
@@ -81,10 +85,12 @@
8185

8286
; DEFAULT-BASIC: Greedy Register Allocator
8387
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
88+
; DEFAULT-BASIC-NEXT: Stack Slot Coloring
8489
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
8590
; DEFAULT-BASIC-NEXT: Virtual Register Map
8691
; DEFAULT-BASIC-NEXT: Live Register Matrix
8792
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
93+
; DEFAULT-BASIC-NEXT: Live Stack Slot Analysis
8894
; DEFAULT-BASIC-NEXT: Basic Register Allocator
8995
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
9096
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
@@ -107,10 +113,12 @@
107113
; BASIC-BASIC-NEXT: Live Register Matrix
108114
; BASIC-BASIC-NEXT: Basic Register Allocator
109115
; BASIC-BASIC-NEXT: Virtual Register Rewriter
116+
; BASIC-BASIC-NEXT: Stack Slot Coloring
110117
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
111118
; BASIC-BASIC-NEXT: Virtual Register Map
112119
; BASIC-BASIC-NEXT: Live Register Matrix
113120
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
121+
; BASIC-BASIC-NEXT: Live Stack Slot Analysis
114122
; BASIC-BASIC-NEXT: Basic Register Allocator
115123
; BASIC-BASIC-NEXT: SI Lower WWM Copies
116124
; BASIC-BASIC-NEXT: Virtual Register Rewriter

0 commit comments

Comments
 (0)