Skip to content

Commit b09b9ac

Browse files
authored
[llvm][CodeGen] Fix the empty interval issue in Window Scheduler (#129204)
The interval of newly generated reg in ModuloScheduleExpander is empty. This will cause crash at some corner case. This patch recalculate the live intervals of these regs.
1 parent e0fee65 commit b09b9ac

File tree

3 files changed

+215
-21
lines changed

3 files changed

+215
-21
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ class ModuloScheduleExpander {
188188
/// Instructions to change when emitting the final schedule.
189189
InstrChangesTy InstrChanges;
190190

191+
/// Record the registers that need to compute live intervals.
192+
SmallVector<Register> NoIntervalRegs;
193+
191194
void generatePipelinedLoop();
192195
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
193196
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
@@ -211,6 +214,7 @@ class ModuloScheduleExpander {
211214
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
212215
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
213216
ValueMapTy *VRMap);
217+
void calculateIntervals();
214218
bool computeDelta(MachineInstr &MI, unsigned &Delta);
215219
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
216220
unsigned Num);

llvm/lib/CodeGen/ModuloSchedule.cpp

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
141141
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142142
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
143143
KernelBB->push_back(NewMI);
144+
LIS.InsertMachineInstrInMaps(*NewMI);
144145
InstrMap[NewMI] = CI;
145146
}
146147

@@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
150151
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
151152
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
152153
KernelBB->push_back(NewMI);
154+
LIS.InsertMachineInstrInMaps(*NewMI);
153155
InstrMap[NewMI] = &MI;
154156
}
155157

@@ -179,6 +181,10 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
179181
// Add branches between prolog and epilog blocks.
180182
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
181183

184+
// The intervals of newly created virtual registers are calculated after the
185+
// kernel expansion.
186+
calculateIntervals();
187+
182188
delete[] VRMap;
183189
delete[] VRMapPhi;
184190
}
@@ -226,6 +232,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
226232
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
227233
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
228234
NewBB->push_back(NewMI);
235+
LIS.InsertMachineInstrInMaps(*NewMI);
229236
InstrMap[NewMI] = &*BBI;
230237
}
231238
}
@@ -303,6 +310,7 @@ void ModuloScheduleExpander::generateEpilog(
303310
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
304311
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
305312
NewBB->push_back(NewMI);
313+
LIS.InsertMachineInstrInMaps(*NewMI);
306314
InstrMap[NewMI] = In;
307315
}
308316
}
@@ -343,14 +351,11 @@ void ModuloScheduleExpander::generateEpilog(
343351
/// basic block with ToReg.
344352
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
345353
MachineBasicBlock *MBB,
346-
MachineRegisterInfo &MRI,
347-
LiveIntervals &LIS) {
354+
MachineRegisterInfo &MRI) {
348355
for (MachineOperand &O :
349356
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
350357
if (O.getParent()->getParent() != MBB)
351358
O.setReg(ToReg);
352-
if (!LIS.hasInterval(ToReg))
353-
LIS.createEmptyInterval(ToReg);
354359
}
355360

356361
/// Return true if the register has a use that occurs outside the
@@ -544,8 +549,10 @@ void ModuloScheduleExpander::generateExistingPhis(
544549
if (VRMap[LastStageNum - np - 1].count(LoopVal))
545550
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
546551

547-
if (IsLast && np == NumPhis - 1)
548-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
552+
if (IsLast && np == NumPhis - 1) {
553+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
554+
NoIntervalRegs.push_back(NewReg);
555+
}
549556
continue;
550557
}
551558
}
@@ -563,6 +570,7 @@ void ModuloScheduleExpander::generateExistingPhis(
563570
TII->get(TargetOpcode::PHI), NewReg);
564571
NewPhi.addReg(PhiOp1).addMBB(BB1);
565572
NewPhi.addReg(PhiOp2).addMBB(BB2);
573+
LIS.InsertMachineInstrInMaps(*NewPhi);
566574
if (np == 0)
567575
InstrMap[NewPhi] = &*BBI;
568576

@@ -584,8 +592,10 @@ void ModuloScheduleExpander::generateExistingPhis(
584592
// Check if we need to rename any uses that occurs after the loop. The
585593
// register to replace depends on whether the Phi is scheduled in the
586594
// epilog.
587-
if (IsLast && np == NumPhis - 1)
588-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
595+
if (IsLast && np == NumPhis - 1) {
596+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
597+
NoIntervalRegs.push_back(NewReg);
598+
}
589599

590600
// In the kernel, a dependent Phi uses the value from this Phi.
591601
if (InKernel)
@@ -603,9 +613,12 @@ void ModuloScheduleExpander::generateExistingPhis(
603613
// Check if we need to rename a Phi that has been eliminated due to
604614
// scheduling.
605615
if (NumStages == 0 && IsLast) {
606-
auto It = VRMap[CurStageNum].find(LoopVal);
607-
if (It != VRMap[CurStageNum].end())
608-
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
616+
auto &CurStageMap = VRMap[CurStageNum];
617+
auto It = CurStageMap.find(LoopVal);
618+
if (It != CurStageMap.end()) {
619+
replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
620+
NoIntervalRegs.push_back(It->second);
621+
}
609622
}
610623
}
611624
}
@@ -705,6 +718,7 @@ void ModuloScheduleExpander::generatePhis(
705718
TII->get(TargetOpcode::PHI), NewReg);
706719
NewPhi.addReg(PhiOp1).addMBB(BB1);
707720
NewPhi.addReg(PhiOp2).addMBB(BB2);
721+
LIS.InsertMachineInstrInMaps(*NewPhi);
708722
if (np == 0)
709723
InstrMap[NewPhi] = &*BBI;
710724

@@ -724,8 +738,10 @@ void ModuloScheduleExpander::generatePhis(
724738
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
725739
NewReg);
726740
}
727-
if (IsLast && np == NumPhis - 1)
728-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
741+
if (IsLast && np == NumPhis - 1) {
742+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
743+
NoIntervalRegs.push_back(NewReg);
744+
}
729745
}
730746
}
731747
}
@@ -834,9 +850,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
834850
// We split the lifetime when we find the first use.
835851
if (!SplitReg) {
836852
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
837-
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
838-
TII->get(TargetOpcode::COPY), SplitReg)
839-
.addReg(Def);
853+
MachineInstr *newCopy =
854+
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
855+
TII->get(TargetOpcode::COPY), SplitReg)
856+
.addReg(Def);
857+
LIS.InsertMachineInstrInMaps(*newCopy);
840858
}
841859
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
842860
}
@@ -904,13 +922,17 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
904922
removePhis(Epilog, LastEpi);
905923
// Remove the blocks that are no longer referenced.
906924
if (LastPro != LastEpi) {
925+
for (auto &MI : *LastEpi)
926+
LIS.RemoveMachineInstrFromMaps(MI);
907927
LastEpi->clear();
908928
LastEpi->eraseFromParent();
909929
}
910930
if (LastPro == KernelBB) {
911931
LoopInfo->disposed(&LIS);
912932
NewKernel = nullptr;
913933
}
934+
for (auto &MI : *LastPro)
935+
LIS.RemoveMachineInstrFromMaps(MI);
914936
LastPro->clear();
915937
LastPro->eraseFromParent();
916938
} else {
@@ -931,6 +953,14 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
931953
}
932954
}
933955

956+
/// Some registers are generated during the kernel expansion. We calculate the
957+
/// live intervals of these registers after the expansion.
958+
void ModuloScheduleExpander::calculateIntervals() {
959+
for (Register Reg : NoIntervalRegs)
960+
LIS.createAndComputeVirtRegInterval(Reg);
961+
NoIntervalRegs.clear();
962+
}
963+
934964
/// Return true if we can compute the amount the instruction changes
935965
/// during each iteration. Set Delta to the amount of the change.
936966
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -1051,8 +1081,10 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
10511081
Register NewReg = MRI.createVirtualRegister(RC);
10521082
MO.setReg(NewReg);
10531083
VRMap[CurStageNum][reg] = NewReg;
1054-
if (LastDef)
1055-
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1084+
if (LastDef) {
1085+
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1086+
NoIntervalRegs.push_back(NewReg);
1087+
}
10561088
} else if (MO.isUse()) {
10571089
MachineInstr *Def = MRI.getVRegDef(reg);
10581090
// Compute the stage that contains the last definition for instruction.
@@ -1201,10 +1233,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
12011233
UseOp.setReg(ReplaceReg);
12021234
else {
12031235
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1204-
BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1205-
SplitReg)
1206-
.addReg(ReplaceReg);
1236+
MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
1237+
TII->get(TargetOpcode::COPY), SplitReg)
1238+
.addReg(ReplaceReg);
12071239
UseOp.setReg(SplitReg);
1240+
LIS.InsertMachineInstrInMaps(*newCopy);
12081241
}
12091242
}
12101243
}
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s
3+
4+
--- |
5+
define void @test_swp_ws_live_intervals(i32 %.pre) {
6+
entry:
7+
%cgep9 = bitcast ptr null to ptr
8+
br label %for.body147
9+
10+
for.body147: ; preds = %for.body170, %entry
11+
%add11.i526 = or i32 %.pre, 1
12+
br label %for.body158
13+
14+
for.body158: ; preds = %for.body158, %for.body147
15+
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
16+
%add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
17+
%and8.i534 = and i32 %add11.i536602603, 1
18+
%cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
19+
store i32 0, ptr %cgep7, align 4
20+
%lsr.iv.next = add nsw i32 %lsr.iv, 1
21+
%cmp157.3 = icmp ult i32 %lsr.iv.next, 510
22+
br i1 %cmp157.3, label %for.body158, label %for.body170
23+
24+
for.body170: ; preds = %for.body170, %for.body158
25+
%lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
26+
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
27+
%add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
28+
%cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
29+
store i32 0, ptr %cgep5, align 8
30+
%sub.i547.1 = add i32 %add11.i556606607, 1
31+
%and.i548.1 = and i32 %sub.i547.1, 1
32+
%cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
33+
%0 = load i32, ptr %cgep8, align 4
34+
store i32 %0, ptr %lsr.iv3, align 4
35+
%lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
36+
%cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
37+
%cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
38+
br i1 %cmp169.1, label %for.body170, label %for.body147
39+
}
40+
41+
...
42+
---
43+
name: test_swp_ws_live_intervals
44+
tracksRegLiveness: true
45+
body: |
46+
; CHECK-LABEL: name: test_swp_ws_live_intervals
47+
; CHECK: bb.0.entry:
48+
; CHECK-NEXT: successors: %bb.1(0x80000000)
49+
; CHECK-NEXT: liveins: $r0
50+
; CHECK-NEXT: {{ $}}
51+
; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
52+
; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: bb.1:
55+
; CHECK-NEXT: successors: %bb.5(0x80000000)
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.5:
58+
; CHECK-NEXT: successors: %bb.6(0x80000000)
59+
; CHECK-NEXT: {{ $}}
60+
; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
61+
; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
62+
; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
63+
; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
64+
; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
65+
; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
66+
; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: bb.6:
69+
; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
72+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
73+
; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
74+
; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
75+
; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
76+
; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
77+
; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
78+
; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
79+
; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
80+
; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
81+
; CHECK-NEXT: {{ $}}
82+
; CHECK-NEXT: bb.7:
83+
; CHECK-NEXT: successors: %bb.3(0x80000000)
84+
; CHECK-NEXT: {{ $}}
85+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
86+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
87+
; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
88+
; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
89+
; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
90+
; CHECK-NEXT: {{ $}}
91+
; CHECK-NEXT: bb.3:
92+
; CHECK-NEXT: successors: %bb.4(0x80000000)
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
95+
; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
96+
; CHECK-NEXT: {{ $}}
97+
; CHECK-NEXT: bb.4:
98+
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
101+
; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
102+
; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
103+
; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
104+
; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
105+
; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
106+
; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
107+
; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
108+
; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
109+
; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
110+
; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
111+
bb.0.entry:
112+
successors: %bb.1(0x80000000)
113+
liveins: $r0
114+
115+
%0:intregs = COPY $r0
116+
%1:intregs = S2_setbit_i %0, 0
117+
118+
bb.1:
119+
successors: %bb.2(0x80000000)
120+
121+
J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
122+
123+
bb.2:
124+
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
125+
126+
%2:intregs = PHI %1, %bb.1, %3, %bb.2
127+
%4:intregs = A2_andir %2, 1
128+
%5:intregs = S2_asl_i_r %4, 2
129+
S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
130+
%6:intregs = A2_tfrsi 1
131+
%7:intregs = A2_tfrsi 4
132+
%3:intregs = A2_tfrsi 0
133+
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
134+
J2_jump %bb.3, implicit-def dead $pc
135+
136+
bb.3:
137+
successors: %bb.4(0x80000000)
138+
139+
J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
140+
J2_jump %bb.4, implicit-def $pc
141+
142+
bb.4:
143+
successors: %bb.4(0x7c000000), %bb.1(0x04000000)
144+
145+
%8:intregs = PHI %7, %bb.3, %9, %bb.4
146+
%10:intregs = PHI %6, %bb.3, %11, %bb.4
147+
%11:intregs = A2_tfrsi 0
148+
S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
149+
%12:intregs = A2_addi %10, 1
150+
%13:intregs = S2_insert %5, %12, 1, 2
151+
%14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
152+
S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
153+
%9:intregs = A2_addi %8, 2
154+
ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
155+
J2_jump %bb.1, implicit-def dead $pc
156+
157+
...

0 commit comments

Comments
 (0)