Skip to content

Commit 7e610af

Browse files
committed
[WIP] Implemented a patch to optimize SGPR spills.
Introduced the StackSlotColoring pass after SGPR RegAlloc and Spill to optimize stack slots reusage.
1 parent 3ce9b86 commit 7e610af

11 files changed

+507
-84
lines changed

llvm/lib/CodeGen/StackSlotColoring.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/ADT/BitVector.h"
1414
#include "llvm/ADT/SmallVector.h"
1515
#include "llvm/ADT/Statistic.h"
16+
#include "llvm/CodeGen/LiveDebugVariables.h"
1617
#include "llvm/CodeGen/LiveInterval.h"
1718
#include "llvm/CodeGen/LiveIntervalUnion.h"
1819
#include "llvm/CodeGen/LiveIntervals.h"
@@ -64,6 +65,7 @@ namespace {
6465
MachineFrameInfo *MFI = nullptr;
6566
const TargetInstrInfo *TII = nullptr;
6667
const MachineBlockFrequencyInfo *MBFI = nullptr;
68+
SlotIndexes *Indexes = nullptr;
6769

6870
// SSIntervals - Spill slot intervals.
6971
std::vector<LiveInterval*> SSIntervals;
@@ -152,6 +154,14 @@ namespace {
152154
AU.addRequired<MachineBlockFrequencyInfo>();
153155
AU.addPreserved<MachineBlockFrequencyInfo>();
154156
AU.addPreservedID(MachineDominatorsID);
157+
158+
/// NOTE: As in AMDGPU pass pipeline, reg alloc is spillted into 2 phases
159+
/// and StackSlotColoring is invoked after each phase, it becomes
160+
/// important to preserve additional analyses result to be used by VGPR
161+
/// regAlloc, after being done with SGPR regAlloc and its related passes.
162+
AU.addPreserved<LiveIntervals>();
163+
AU.addPreserved<LiveDebugVariables>();
164+
155165
MachineFunctionPass::getAnalysisUsage(AU);
156166
}
157167

@@ -496,8 +506,13 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
496506
++I;
497507
}
498508

499-
for (MachineInstr *MI : toErase)
509+
/// FIXED: As this pass preserves SlotIndexesAnalysis result, any
510+
/// addition/removal of MI needs corresponding update in SlotIndexAnalysis,
511+
/// to avoid corruption of SlotIndexesAnalysis result.
512+
for (MachineInstr *MI : toErase) {
500513
MI->eraseFromParent();
514+
Indexes->removeMachineInstrFromMaps(*MI);
515+
}
501516

502517
return changed;
503518
}
@@ -515,6 +530,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
515530
TII = MF.getSubtarget().getInstrInfo();
516531
LS = &getAnalysis<LiveStacks>();
517532
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
533+
Indexes = &getAnalysis<SlotIndexes>();
518534

519535
bool Changed = false;
520536

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,6 +1406,9 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14061406
// since FastRegAlloc does the replacements itself.
14071407
addPass(createVirtRegRewriter(false));
14081408

1409+
// Optimizes SGPR spills into VGPR lanes for non-interferring spill-ranges.
1410+
addPass(&StackSlotColoringID);
1411+
14091412
// Equivalent of PEI for SGPRs.
14101413
addPass(&SILowerSGPRSpillsID);
14111414
addPass(&SIPreAllocateWWMRegsID);

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ class SILowerSGPRSpills : public MachineFunctionPass {
5252
void calculateSaveRestoreBlocks(MachineFunction &MF);
5353
bool spillCalleeSavedRegs(MachineFunction &MF,
5454
SmallVectorImpl<int> &CalleeSavedFIs);
55-
void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
55+
void extendWWMVirtRegLiveness(MachineFunction &MF, SlotIndexes *Indexes,
56+
LiveIntervals *LIS);
5657

5758
bool runOnMachineFunction(MachineFunction &MF) override;
5859

@@ -260,6 +261,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
260261
}
261262

262263
void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
264+
SlotIndexes *Indexes,
263265
LiveIntervals *LIS) {
264266
// TODO: This is a workaround to avoid the unmodelled liveness computed with
265267
// whole-wave virtual registers when allocated together with the regular VGPR
@@ -278,14 +280,21 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
278280
for (auto Reg : MFI->getSGPRSpillVGPRs()) {
279281
for (MachineBasicBlock *SaveBlock : SaveBlocks) {
280282
MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
283+
MachineInstrSpan MIS(InsertBefore, SaveBlock);
284+
281285
DebugLoc DL = SaveBlock->findDebugLoc(InsertBefore);
282286
auto MIB = BuildMI(*SaveBlock, InsertBefore, DL,
283287
TII->get(AMDGPU::IMPLICIT_DEF), Reg);
284288
MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
285289
// Set SGPR_SPILL asm printer flag
286290
MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
291+
287292
if (LIS) {
288293
LIS->InsertMachineInstrInMaps(*MIB);
294+
} else if (Indexes) {
295+
assert(std::distance(MIS.begin(), InsertBefore) == 1);
296+
MachineInstr &Inst = *std::prev(InsertBefore);
297+
Indexes->insertMachineInstrInMaps(Inst);
289298
}
290299
}
291300
}
@@ -300,8 +309,13 @@ void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
300309
auto MIB = BuildMI(*RestoreBlock, InsertBefore, DL,
301310
TII->get(TargetOpcode::KILL));
302311
MIB.addReg(Reg);
303-
if (LIS)
312+
313+
if (LIS) {
304314
LIS->InsertMachineInstrInMaps(*MIB);
315+
} else if (Indexes) {
316+
MachineInstr &Inst = *std::prev(InsertBefore);
317+
Indexes->insertMachineInstrInMaps(Inst);
318+
}
305319
}
306320
}
307321
}
@@ -392,7 +406,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
392406
}
393407

394408
if (SpilledToVirtVGPRLanes) {
395-
extendWWMVirtRegLiveness(MF, LIS);
409+
extendWWMVirtRegLiveness(MF, Indexes, LIS);
396410
if (LIS) {
397411
// Compute the LiveInterval for the newly created virtual registers.
398412
for (auto Reg : FuncInfo->getSGPRSpillVGPRs())

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,8 +1775,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
17751775

17761776
if (SpillToVGPR) {
17771777

1778-
assert(SB.NumSubRegs == VGPRSpills.size() &&
1779-
"Num of VGPR lanes should be equal to num of SGPRs spilled");
1778+
assert(SB.NumSubRegs <= VGPRSpills.size() &&
1779+
"Num of VGPR lanes should be greater or equal to num of SGPRs "
1780+
"spilled, as Stack Slot Coloring pass assigns different SGPR spills "
1781+
"into same stack slots");
17801782

17811783
for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
17821784
Register SubReg =

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,10 +362,12 @@
362362
; GCN-O1-NEXT: Machine Optimization Remark Emitter
363363
; GCN-O1-NEXT: Greedy Register Allocator
364364
; GCN-O1-NEXT: Virtual Register Rewriter
365+
; GCN-O1-NEXT: Stack Slot Coloring
365366
; GCN-O1-NEXT: SI lower SGPR spill instructions
366367
; GCN-O1-NEXT: Virtual Register Map
367368
; GCN-O1-NEXT: Live Register Matrix
368369
; GCN-O1-NEXT: SI Pre-allocate WWM Registers
370+
; GCN-O1-NEXT: Live Stack Slot Analysis
369371
; GCN-O1-NEXT: Greedy Register Allocator
370372
; GCN-O1-NEXT: SI Lower WWM Copies
371373
; GCN-O1-NEXT: GCN NSA Reassign
@@ -665,10 +667,12 @@
665667
; GCN-O1-OPTS-NEXT: Machine Optimization Remark Emitter
666668
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
667669
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
670+
; GCN-O1-OPTS-NEXT: Stack Slot Coloring
668671
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
669672
; GCN-O1-OPTS-NEXT: Virtual Register Map
670673
; GCN-O1-OPTS-NEXT: Live Register Matrix
671674
; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers
675+
; GCN-O1-OPTS-NEXT: Live Stack Slot Analysis
672676
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
673677
; GCN-O1-OPTS-NEXT: SI Lower WWM Copies
674678
; GCN-O1-OPTS-NEXT: GCN NSA Reassign
@@ -974,10 +978,12 @@
974978
; GCN-O2-NEXT: Machine Optimization Remark Emitter
975979
; GCN-O2-NEXT: Greedy Register Allocator
976980
; GCN-O2-NEXT: Virtual Register Rewriter
981+
; GCN-O2-NEXT: Stack Slot Coloring
977982
; GCN-O2-NEXT: SI lower SGPR spill instructions
978983
; GCN-O2-NEXT: Virtual Register Map
979984
; GCN-O2-NEXT: Live Register Matrix
980985
; GCN-O2-NEXT: SI Pre-allocate WWM Registers
986+
; GCN-O2-NEXT: Live Stack Slot Analysis
981987
; GCN-O2-NEXT: Greedy Register Allocator
982988
; GCN-O2-NEXT: SI Lower WWM Copies
983989
; GCN-O2-NEXT: GCN NSA Reassign
@@ -1295,10 +1301,12 @@
12951301
; GCN-O3-NEXT: Machine Optimization Remark Emitter
12961302
; GCN-O3-NEXT: Greedy Register Allocator
12971303
; GCN-O3-NEXT: Virtual Register Rewriter
1304+
; GCN-O3-NEXT: Stack Slot Coloring
12981305
; GCN-O3-NEXT: SI lower SGPR spill instructions
12991306
; GCN-O3-NEXT: Virtual Register Map
13001307
; GCN-O3-NEXT: Live Register Matrix
13011308
; GCN-O3-NEXT: SI Pre-allocate WWM Registers
1309+
; GCN-O3-NEXT: Live Stack Slot Analysis
13021310
; GCN-O3-NEXT: Greedy Register Allocator
13031311
; GCN-O3-NEXT: SI Lower WWM Copies
13041312
; GCN-O3-NEXT: GCN NSA Reassign

llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -221,15 +221,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
221221
; GFX906-NEXT: ; def s29
222222
; GFX906-NEXT: ;;#ASMEND
223223
; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
224-
; GFX906-NEXT: v_writelane_b32 v40, s21, 24
225-
; GFX906-NEXT: v_writelane_b32 v40, s22, 25
226-
; GFX906-NEXT: v_writelane_b32 v40, s23, 26
227-
; GFX906-NEXT: v_writelane_b32 v40, s24, 27
228-
; GFX906-NEXT: v_writelane_b32 v40, s25, 28
229-
; GFX906-NEXT: v_writelane_b32 v40, s26, 29
230-
; GFX906-NEXT: v_writelane_b32 v40, s27, 30
231-
; GFX906-NEXT: v_writelane_b32 v40, s28, 31
232-
; GFX906-NEXT: v_writelane_b32 v40, s29, 32
224+
; GFX906-NEXT: v_writelane_b32 v40, s21, 12
225+
; GFX906-NEXT: v_writelane_b32 v40, s22, 13
226+
; GFX906-NEXT: v_writelane_b32 v40, s23, 14
227+
; GFX906-NEXT: v_writelane_b32 v40, s24, 15
228+
; GFX906-NEXT: v_writelane_b32 v40, s25, 16
229+
; GFX906-NEXT: v_writelane_b32 v40, s26, 17
230+
; GFX906-NEXT: v_writelane_b32 v40, s27, 18
231+
; GFX906-NEXT: v_writelane_b32 v40, s28, 19
232+
; GFX906-NEXT: v_writelane_b32 v40, s29, 20
233233
; GFX906-NEXT: v_readlane_b32 s4, v40, 10
234234
; GFX906-NEXT: v_readlane_b32 s6, v40, 0
235235
; GFX906-NEXT: v_readlane_b32 s8, v40, 8
@@ -249,39 +249,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
249249
; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17]
250250
; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1
251251
; GFX906-NEXT: s_mov_b64 exec, s[34:35]
252-
; GFX906-NEXT: v_readlane_b32 s21, v40, 24
252+
; GFX906-NEXT: v_readlane_b32 s21, v40, 12
253253
; GFX906-NEXT: ;;#ASMSTART
254254
; GFX906-NEXT: ; use s21
255255
; GFX906-NEXT: ;;#ASMEND
256-
; GFX906-NEXT: v_readlane_b32 s22, v40, 25
256+
; GFX906-NEXT: v_readlane_b32 s22, v40, 13
257257
; GFX906-NEXT: ;;#ASMSTART
258258
; GFX906-NEXT: ; use s22
259259
; GFX906-NEXT: ;;#ASMEND
260-
; GFX906-NEXT: v_readlane_b32 s23, v40, 26
260+
; GFX906-NEXT: v_readlane_b32 s23, v40, 14
261261
; GFX906-NEXT: ;;#ASMSTART
262262
; GFX906-NEXT: ; use s23
263263
; GFX906-NEXT: ;;#ASMEND
264-
; GFX906-NEXT: v_readlane_b32 s24, v40, 27
264+
; GFX906-NEXT: v_readlane_b32 s24, v40, 15
265265
; GFX906-NEXT: ;;#ASMSTART
266266
; GFX906-NEXT: ; use s24
267267
; GFX906-NEXT: ;;#ASMEND
268-
; GFX906-NEXT: v_readlane_b32 s25, v40, 28
268+
; GFX906-NEXT: v_readlane_b32 s25, v40, 16
269269
; GFX906-NEXT: ;;#ASMSTART
270270
; GFX906-NEXT: ; use s25
271271
; GFX906-NEXT: ;;#ASMEND
272-
; GFX906-NEXT: v_readlane_b32 s26, v40, 29
272+
; GFX906-NEXT: v_readlane_b32 s26, v40, 17
273273
; GFX906-NEXT: ;;#ASMSTART
274274
; GFX906-NEXT: ; use s26
275275
; GFX906-NEXT: ;;#ASMEND
276-
; GFX906-NEXT: v_readlane_b32 s27, v40, 30
276+
; GFX906-NEXT: v_readlane_b32 s27, v40, 18
277277
; GFX906-NEXT: ;;#ASMSTART
278278
; GFX906-NEXT: ; use s27
279279
; GFX906-NEXT: ;;#ASMEND
280-
; GFX906-NEXT: v_readlane_b32 s28, v40, 31
280+
; GFX906-NEXT: v_readlane_b32 s28, v40, 19
281281
; GFX906-NEXT: ;;#ASMSTART
282282
; GFX906-NEXT: ; use s28
283283
; GFX906-NEXT: ;;#ASMEND
284-
; GFX906-NEXT: v_readlane_b32 s29, v40, 32
284+
; GFX906-NEXT: v_readlane_b32 s29, v40, 20
285285
; GFX906-NEXT: ;;#ASMSTART
286286
; GFX906-NEXT: ; use s29
287287
; GFX906-NEXT: ;;#ASMEND
@@ -602,15 +602,15 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
602602
; GFX908-NEXT: ; def s29
603603
; GFX908-NEXT: ;;#ASMEND
604604
; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
605-
; GFX908-NEXT: v_writelane_b32 v40, s21, 24
606-
; GFX908-NEXT: v_writelane_b32 v40, s22, 25
607-
; GFX908-NEXT: v_writelane_b32 v40, s23, 26
608-
; GFX908-NEXT: v_writelane_b32 v40, s24, 27
609-
; GFX908-NEXT: v_writelane_b32 v40, s25, 28
610-
; GFX908-NEXT: v_writelane_b32 v40, s26, 29
611-
; GFX908-NEXT: v_writelane_b32 v40, s27, 30
612-
; GFX908-NEXT: v_writelane_b32 v40, s28, 31
613-
; GFX908-NEXT: v_writelane_b32 v40, s29, 32
605+
; GFX908-NEXT: v_writelane_b32 v40, s21, 12
606+
; GFX908-NEXT: v_writelane_b32 v40, s22, 13
607+
; GFX908-NEXT: v_writelane_b32 v40, s23, 14
608+
; GFX908-NEXT: v_writelane_b32 v40, s24, 15
609+
; GFX908-NEXT: v_writelane_b32 v40, s25, 16
610+
; GFX908-NEXT: v_writelane_b32 v40, s26, 17
611+
; GFX908-NEXT: v_writelane_b32 v40, s27, 18
612+
; GFX908-NEXT: v_writelane_b32 v40, s28, 19
613+
; GFX908-NEXT: v_writelane_b32 v40, s29, 20
614614
; GFX908-NEXT: v_readlane_b32 s4, v40, 10
615615
; GFX908-NEXT: v_readlane_b32 s6, v40, 0
616616
; GFX908-NEXT: v_readlane_b32 s8, v40, 8
@@ -630,39 +630,39 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 {
630630
; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17]
631631
; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1
632632
; GFX908-NEXT: s_mov_b64 exec, s[34:35]
633-
; GFX908-NEXT: v_readlane_b32 s21, v40, 24
633+
; GFX908-NEXT: v_readlane_b32 s21, v40, 12
634634
; GFX908-NEXT: ;;#ASMSTART
635635
; GFX908-NEXT: ; use s21
636636
; GFX908-NEXT: ;;#ASMEND
637-
; GFX908-NEXT: v_readlane_b32 s22, v40, 25
637+
; GFX908-NEXT: v_readlane_b32 s22, v40, 13
638638
; GFX908-NEXT: ;;#ASMSTART
639639
; GFX908-NEXT: ; use s22
640640
; GFX908-NEXT: ;;#ASMEND
641-
; GFX908-NEXT: v_readlane_b32 s23, v40, 26
641+
; GFX908-NEXT: v_readlane_b32 s23, v40, 14
642642
; GFX908-NEXT: ;;#ASMSTART
643643
; GFX908-NEXT: ; use s23
644644
; GFX908-NEXT: ;;#ASMEND
645-
; GFX908-NEXT: v_readlane_b32 s24, v40, 27
645+
; GFX908-NEXT: v_readlane_b32 s24, v40, 15
646646
; GFX908-NEXT: ;;#ASMSTART
647647
; GFX908-NEXT: ; use s24
648648
; GFX908-NEXT: ;;#ASMEND
649-
; GFX908-NEXT: v_readlane_b32 s25, v40, 28
649+
; GFX908-NEXT: v_readlane_b32 s25, v40, 16
650650
; GFX908-NEXT: ;;#ASMSTART
651651
; GFX908-NEXT: ; use s25
652652
; GFX908-NEXT: ;;#ASMEND
653-
; GFX908-NEXT: v_readlane_b32 s26, v40, 29
653+
; GFX908-NEXT: v_readlane_b32 s26, v40, 17
654654
; GFX908-NEXT: ;;#ASMSTART
655655
; GFX908-NEXT: ; use s26
656656
; GFX908-NEXT: ;;#ASMEND
657-
; GFX908-NEXT: v_readlane_b32 s27, v40, 30
657+
; GFX908-NEXT: v_readlane_b32 s27, v40, 18
658658
; GFX908-NEXT: ;;#ASMSTART
659659
; GFX908-NEXT: ; use s27
660660
; GFX908-NEXT: ;;#ASMEND
661-
; GFX908-NEXT: v_readlane_b32 s28, v40, 31
661+
; GFX908-NEXT: v_readlane_b32 s28, v40, 19
662662
; GFX908-NEXT: ;;#ASMSTART
663663
; GFX908-NEXT: ; use s28
664664
; GFX908-NEXT: ;;#ASMEND
665-
; GFX908-NEXT: v_readlane_b32 s29, v40, 32
665+
; GFX908-NEXT: v_readlane_b32 s29, v40, 20
666666
; GFX908-NEXT: ;;#ASMSTART
667667
; GFX908-NEXT: ; use s29
668668
; GFX908-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
; DEFAULT: Greedy Register Allocator
1919
; DEFAULT-NEXT: Virtual Register Rewriter
20+
; DEFAULT-NEXT: Stack Slot Coloring
2021
; DEFAULT-NEXT: SI lower SGPR spill instructions
2122
; DEFAULT-NEXT: Virtual Register Map
2223
; DEFAULT-NEXT: Live Register Matrix
2324
; DEFAULT-NEXT: SI Pre-allocate WWM Registers
25+
; DEFAULT-NEXT: Live Stack Slot Analysis
2426
; DEFAULT-NEXT: Greedy Register Allocator
2527
; DEFAULT-NEXT: SI Lower WWM Copies
2628
; DEFAULT-NEXT: GCN NSA Reassign
@@ -50,10 +52,12 @@
5052
; BASIC-DEFAULT-NEXT: Live Register Matrix
5153
; BASIC-DEFAULT-NEXT: Basic Register Allocator
5254
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
55+
; BASIC-DEFAULT-NEXT: Stack Slot Coloring
5356
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
5457
; BASIC-DEFAULT-NEXT: Virtual Register Map
5558
; BASIC-DEFAULT-NEXT: Live Register Matrix
5659
; BASIC-DEFAULT-NEXT: SI Pre-allocate WWM Registers
60+
; BASIC-DEFAULT-NEXT: Live Stack Slot Analysis
5761
; BASIC-DEFAULT-NEXT: Bundle Machine CFG Edges
5862
; BASIC-DEFAULT-NEXT: Spill Code Placement Analysis
5963
; BASIC-DEFAULT-NEXT: Lazy Machine Block Frequency Analysis
@@ -69,10 +73,12 @@
6973

7074
; DEFAULT-BASIC: Greedy Register Allocator
7175
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
76+
; DEFAULT-BASIC-NEXT: Stack Slot Coloring
7277
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
7378
; DEFAULT-BASIC-NEXT: Virtual Register Map
7479
; DEFAULT-BASIC-NEXT: Live Register Matrix
7580
; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers
81+
; DEFAULT-BASIC-NEXT: Live Stack Slot Analysis
7682
; DEFAULT-BASIC-NEXT: Basic Register Allocator
7783
; DEFAULT-BASIC-NEXT: SI Lower WWM Copies
7884
; DEFAULT-BASIC-NEXT: GCN NSA Reassign
@@ -90,10 +96,12 @@
9096
; BASIC-BASIC-NEXT: Live Register Matrix
9197
; BASIC-BASIC-NEXT: Basic Register Allocator
9298
; BASIC-BASIC-NEXT: Virtual Register Rewriter
99+
; BASIC-BASIC-NEXT: Stack Slot Coloring
93100
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
94101
; BASIC-BASIC-NEXT: Virtual Register Map
95102
; BASIC-BASIC-NEXT: Live Register Matrix
96103
; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers
104+
; BASIC-BASIC-NEXT: Live Stack Slot Analysis
97105
; BASIC-BASIC-NEXT: Basic Register Allocator
98106
; BASIC-BASIC-NEXT: SI Lower WWM Copies
99107
; BASIC-BASIC-NEXT: GCN NSA Reassign

0 commit comments

Comments
 (0)