Skip to content

Commit 86c1c59

Browse files
committed
[MachinePipeliner] Improve loop carried dependence analysis
The previous implementation had false positive/negative cases in analysis of loop carried dependence. A case of missed dependencies is caused by incorrect analysis of address increments. It is fixed by a strict analysis of recursive definitions. See added test swp-carried-dep4.mir. Excessive detection of dependence is corrected by improving the formula for determining overlap of address ranges to be accessed. See added test swp-carried-dep5.mir.
1 parent f779ec7 commit 86c1c59

File tree

8 files changed

+393
-85
lines changed

8 files changed

+393
-85
lines changed

llvm/include/llvm/CodeGen/MachinePipeliner.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
255255
return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
256256
}
257257

258-
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
258+
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool IsSucc = true);
259259

260260
/// The distance function, which indicates that operation V of iteration I
261261
/// depends on operations U of iteration I-distance.
@@ -287,6 +287,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
287287

288288
static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
289289

290+
bool mayOverlapInLaterIter(const MachineInstr *MIA, const MachineInstr *MIB);
291+
290292
private:
291293
void addLoopCarriedDependences(AAResults *AA);
292294
void updatePhiDependences();
@@ -306,7 +308,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
306308
void computeNodeOrder(NodeSetType &NodeSets);
307309
void checkValidNodeOrder(const NodeSetType &Circuits) const;
308310
bool schedulePipeline(SMSchedule &Schedule);
309-
bool computeDelta(MachineInstr &MI, unsigned &Delta);
311+
bool computeDelta(const MachineInstr &MI, int &Delta);
310312
MachineInstr *findDefInLoop(Register Reg);
311313
bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
312314
unsigned &OffsetPos, unsigned &NewBase,

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 184 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -2507,9 +2507,98 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
25072507
return scheduleFound && Schedule.getMaxStageCount() > 0;
25082508
}
25092509

2510+
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI) {
2511+
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
2512+
Register Result;
2513+
for (const MachineOperand &Use : MI.all_uses()) {
2514+
Register Reg = Use.getReg();
2515+
if (!Reg.isVirtual())
2516+
return Register();
2517+
if (MRI.getVRegDef(Reg)->getParent() != MI.getParent())
2518+
continue;
2519+
if (Result)
2520+
return Register();
2521+
Result = Reg;
2522+
}
2523+
return Result;
2524+
}
2525+
2526+
/// When Op is a value that is incremented recursively in a loop and there is a
2527+
/// unique instruction that increments it, returns true and sets Value.
2528+
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value) {
2529+
if (!Op.isReg() || !Op.getReg().isVirtual())
2530+
return false;
2531+
2532+
Register OrgReg = Op.getReg();
2533+
Register CurReg = OrgReg;
2534+
const MachineBasicBlock *LoopBB = Op.getParent()->getParent();
2535+
const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
2536+
2537+
const TargetInstrInfo *TII =
2538+
LoopBB->getParent()->getSubtarget().getInstrInfo();
2539+
const TargetRegisterInfo *TRI =
2540+
LoopBB->getParent()->getSubtarget().getRegisterInfo();
2541+
2542+
MachineInstr *Phi = nullptr;
2543+
MachineInstr *Increment = nullptr;
2544+
2545+
// Traverse definitions until it reaches Op or an instruction that does not
2546+
// satisfy the condition
2547+
while (true) {
2548+
if (!CurReg.isValid() || !CurReg.isVirtual())
2549+
return false;
2550+
MachineInstr *Def = MRI.getVRegDef(CurReg);
2551+
if (Def->getParent() != LoopBB)
2552+
return false;
2553+
2554+
if (Def->isCopy()) {
2555+
// Ignore copy instructions unless they contain subregisters
2556+
if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2557+
return false;
2558+
CurReg = Def->getOperand(1).getReg();
2559+
} else if (Def->isPHI()) {
2560+
// There must be just one Phi
2561+
if (Phi)
2562+
return false;
2563+
Phi = Def;
2564+
CurReg = getLoopPhiReg(*Def, LoopBB);
2565+
} else if (TII->getIncrementValue(*Def, Value)) {
2566+
// Potentially a unique increment
2567+
if (Increment)
2568+
// Multiple increments exist
2569+
return false;
2570+
2571+
const MachineOperand *BaseOp;
2572+
int64_t Offset;
2573+
bool OffsetIsScalable;
2574+
if (TII->getMemOperandWithOffset(*Def, BaseOp, Offset, OffsetIsScalable,
2575+
TRI)) {
2576+
// Pre/post increment instruction
2577+
CurReg = BaseOp->getReg();
2578+
} else {
2579+
// If only one of the operands is defined within the loop, it is assumed
2580+
// to be an incremented value.
2581+
CurReg = findUniqueOperandDefinedInLoop(*Def);
2582+
if (!CurReg.isValid())
2583+
return false;
2584+
}
2585+
Increment = Def;
2586+
} else {
2587+
return false;
2588+
}
2589+
if (CurReg == OrgReg)
2590+
break;
2591+
}
2592+
2593+
if (!Phi || !Increment)
2594+
return false;
2595+
2596+
return true;
2597+
}
2598+
25102599
/// Return true if we can compute the amount the instruction changes
25112600
/// during each iteration. Set Delta to the amount of the change.
2512-
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2601+
bool SwingSchedulerDAG::computeDelta(const MachineInstr &MI, int &Delta) {
25132602
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
25142603
const MachineOperand *BaseOp;
25152604
int64_t Offset;
@@ -2524,24 +2613,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
25242613
if (!BaseOp->isReg())
25252614
return false;
25262615

2527-
Register BaseReg = BaseOp->getReg();
2528-
2529-
MachineRegisterInfo &MRI = MF.getRegInfo();
2530-
// Check if there is a Phi. If so, get the definition in the loop.
2531-
MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2532-
if (BaseDef && BaseDef->isPHI()) {
2533-
BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2534-
BaseDef = MRI.getVRegDef(BaseReg);
2535-
}
2536-
if (!BaseDef)
2537-
return false;
2538-
2539-
int D = 0;
2540-
if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2541-
return false;
2542-
2543-
Delta = D;
2544-
return true;
2616+
return findLoopIncrementValue(*BaseOp, Delta);
25452617
}
25462618

25472619
/// Check if we can change the instruction to use an offset value from the
@@ -2659,11 +2731,101 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
26592731
return Def;
26602732
}
26612733

2734+
/// Return false if there is no overlap between the region accessed by BaseMI in
2735+
/// an iteration and the region accessed by OtherMI in subsequent iterations.
2736+
bool SwingSchedulerDAG::mayOverlapInLaterIter(const MachineInstr *BaseMI,
2737+
const MachineInstr *OtherMI) {
2738+
int DeltaB, DeltaO, Delta;
2739+
if (!computeDelta(*BaseMI, DeltaB) || !computeDelta(*OtherMI, DeltaO) ||
2740+
DeltaB != DeltaO)
2741+
return true;
2742+
Delta = DeltaB;
2743+
2744+
const MachineOperand *BaseOpB, *BaseOpO;
2745+
int64_t OffsetB, OffsetO;
2746+
bool OffsetBIsScalable, OffsetOIsScalable;
2747+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2748+
if (!TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
2749+
OffsetBIsScalable, TRI) ||
2750+
!TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
2751+
OffsetOIsScalable, TRI))
2752+
return true;
2753+
2754+
if (OffsetBIsScalable || OffsetOIsScalable)
2755+
return true;
2756+
2757+
if (!BaseOpB->isIdenticalTo(*BaseOpO)) {
2758+
// Pass cases with different base operands but same initial values.
2759+
// Typically for when pre/post increment is used.
2760+
2761+
if (!BaseOpB->isReg() || !BaseOpO->isReg())
2762+
return true;
2763+
Register RegB = BaseOpB->getReg(), RegO = BaseOpO->getReg();
2764+
if (!RegB.isVirtual() || !RegO.isVirtual())
2765+
return true;
2766+
2767+
MachineInstr *DefB = MRI.getVRegDef(BaseOpB->getReg());
2768+
MachineInstr *DefO = MRI.getVRegDef(BaseOpO->getReg());
2769+
if (!DefB || !DefO || !DefB->isPHI() || !DefO->isPHI())
2770+
return true;
2771+
2772+
unsigned InitValB = 0;
2773+
unsigned LoopValB = 0;
2774+
unsigned InitValO = 0;
2775+
unsigned LoopValO = 0;
2776+
getPhiRegs(*DefB, BB, InitValB, LoopValB);
2777+
getPhiRegs(*DefO, BB, InitValO, LoopValO);
2778+
MachineInstr *InitDefB = MRI.getVRegDef(InitValB);
2779+
MachineInstr *InitDefO = MRI.getVRegDef(InitValO);
2780+
2781+
if (!InitDefB->isIdenticalTo(*InitDefO))
2782+
return true;
2783+
}
2784+
2785+
LocationSize AccessSizeB = (*BaseMI->memoperands_begin())->getSize();
2786+
LocationSize AccessSizeO = (*OtherMI->memoperands_begin())->getSize();
2787+
2788+
// This is the main test, which checks the offset values and the loop
2789+
// increment value to determine if the accesses may be loop carried.
2790+
if (!AccessSizeB.hasValue() || !AccessSizeO.hasValue())
2791+
return true;
2792+
2793+
LLVM_DEBUG({
2794+
dbgs() << "Overlap check:\n";
2795+
dbgs() << " BaseMI: ";
2796+
BaseMI->dump();
2797+
dbgs() << " Base + " << OffsetB << " + I * " << Delta
2798+
<< ", Len: " << AccessSizeB.getValue() << "\n";
2799+
dbgs() << " OtherMI: ";
2800+
OtherMI->dump();
2801+
dbgs() << " Base + " << OffsetO << " + I * " << Delta
2802+
<< ", Len: " << AccessSizeO.getValue() << "\n";
2803+
});
2804+
2805+
if (Delta < 0) {
2806+
int64_t BaseMinAddr = OffsetB;
2807+
int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue() - 1;
2808+
if (BaseMinAddr > OhterNextIterMaxAddr) {
2809+
LLVM_DEBUG(dbgs() << " Result: No overlap\n");
2810+
return false;
2811+
}
2812+
} else {
2813+
int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue() - 1;
2814+
int64_t OtherNextIterMinAddr = OffsetO + Delta;
2815+
if (BaseMaxAddr < OtherNextIterMinAddr) {
2816+
LLVM_DEBUG(dbgs() << " Result: No overlap\n");
2817+
return false;
2818+
}
2819+
}
2820+
LLVM_DEBUG(dbgs() << " Result: Overlap\n");
2821+
return true;
2822+
}
2823+
26622824
/// Return true for an order or output dependence that is loop carried
26632825
/// potentially. A dependence is loop carried if the destination defines a value
26642826
/// that may be used or defined by the source in a subsequent iteration.
26652827
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
2666-
bool isSucc) {
2828+
bool IsSucc) {
26672829
if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
26682830
Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
26692831
return false;
@@ -2676,7 +2838,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
26762838

26772839
MachineInstr *SI = Source->getInstr();
26782840
MachineInstr *DI = Dep.getSUnit()->getInstr();
2679-
if (!isSucc)
2841+
if (!IsSucc)
26802842
std::swap(SI, DI);
26812843
assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
26822844

@@ -2692,61 +2854,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
26922854
// The conservative assumption is that a dependence between memory operations
26932855
// may be loop carried. The following code checks when it can be proved that
26942856
// there is no loop carried dependence.
2695-
unsigned DeltaS, DeltaD;
2696-
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2697-
return true;
2698-
2699-
const MachineOperand *BaseOpS, *BaseOpD;
2700-
int64_t OffsetS, OffsetD;
2701-
bool OffsetSIsScalable, OffsetDIsScalable;
2702-
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2703-
if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2704-
TRI) ||
2705-
!TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2706-
TRI))
2707-
return true;
2708-
2709-
assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2710-
"Expected offsets to be byte offsets");
2711-
2712-
MachineInstr *DefS = MRI.getVRegDef(BaseOpS->getReg());
2713-
MachineInstr *DefD = MRI.getVRegDef(BaseOpD->getReg());
2714-
if (!DefS || !DefD || !DefS->isPHI() || !DefD->isPHI())
2715-
return true;
2716-
2717-
unsigned InitValS = 0;
2718-
unsigned LoopValS = 0;
2719-
unsigned InitValD = 0;
2720-
unsigned LoopValD = 0;
2721-
getPhiRegs(*DefS, BB, InitValS, LoopValS);
2722-
getPhiRegs(*DefD, BB, InitValD, LoopValD);
2723-
MachineInstr *InitDefS = MRI.getVRegDef(InitValS);
2724-
MachineInstr *InitDefD = MRI.getVRegDef(InitValD);
2725-
2726-
if (!InitDefS->isIdenticalTo(*InitDefD))
2727-
return true;
2728-
2729-
// Check that the base register is incremented by a constant value for each
2730-
// iteration.
2731-
MachineInstr *LoopDefS = MRI.getVRegDef(LoopValS);
2732-
int D = 0;
2733-
if (!LoopDefS || !TII->getIncrementValue(*LoopDefS, D))
2734-
return true;
2735-
2736-
LocationSize AccessSizeS = (*SI->memoperands_begin())->getSize();
2737-
LocationSize AccessSizeD = (*DI->memoperands_begin())->getSize();
2738-
2739-
// This is the main test, which checks the offset values and the loop
2740-
// increment value to determine if the accesses may be loop carried.
2741-
if (!AccessSizeS.hasValue() || !AccessSizeD.hasValue())
2742-
return true;
2743-
2744-
if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue() ||
2745-
DeltaD < AccessSizeD.getValue())
2746-
return true;
2747-
2748-
return (OffsetS + (int64_t)AccessSizeS.getValue() <
2749-
OffsetD + (int64_t)AccessSizeD.getValue());
2857+
return mayOverlapInLaterIter(DI, SI);
27502858
}
27512859

27522860
void SwingSchedulerDAG::postProcessDAG() {

llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33

44
# Test that the loop carried dependence check correctly identifies a recurrence.
55

6+
# CHECK: Overlap check:
7+
# CHECK-NEXT: BaseMI: S2_storerh_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s16) into %ir.lsr.iv24)
8+
# CHECK-NEXT: Base + 0 + I * 4, Len: 2
9+
# CHECK-NEXT: OtherMI: %{{[0-9]+}}:intregs = L2_loadrh_io %{{[0-9]+}}:intregs, -8 :: (load (s16) from %ir.cgep10)
10+
# CHECK-NEXT: Base + -8 + I * 4, Len: 2
11+
# CHECK-NEXT: Result: Overlap
12+
613
# CHECK: Rec NodeSet
714
# CHECK: Rec NodeSet
815
# CHECK: Rec NodeSet

llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
22
# REQUIRES: asserts
33

4-
# Test that the loop carried dependence check correctly identifies a recurrence
4+
# Test that it correctly recognizes that there is no loop carried dependence
55
# when the loop variable decreases and the array index offset is negative.
66

7-
# CHECK: Rec NodeSet
8-
# CHECK: Rec NodeSet
9-
# CHECK: SU(3)
10-
# CHECK: SU(4)
11-
# CHECK: SU(5)
7+
# CHECK: Overlap check:
8+
# CHECK-NEXT: BaseMI: S2_storeri_io %{{[0-9]+}}:intregs, 0, %{{[0-9]+}}:intregs :: (store (s32) into %ir.lsr.iv1)
9+
# CHECK-NEXT: Base + 0 + I * -4, Len: 4
10+
# CHECK-NEXT: OtherMI: %{{[0-9]+}}:intregs = L2_loadri_io %{{[0-9]+}}:intregs, -8 :: (load (s32) from %ir.cgep)
11+
# CHECK-NEXT: Base + -8 + I * -4, Len: 4
12+
# CHECK-NEXT: Result: No overlap
1213

1314
--- |
1415

llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@
77
# requires to use a single CHECK-NOT to match such a Rec NodeSet. Fortunately
88
# the atom '.' does not match a newline but anything else on a line.
99

10+
# CHECK: Overlap check:
11+
# CHECK-NEXT: BaseMI: %13:intregs = S2_storerh_pi %12:intregs(tied-def 0), 2, %20:intregs :: (store (s16))
12+
# CHECK-NEXT: Base + 0 + I * 2, Len: 2
13+
# CHECK-NEXT: OtherMI: %19:intregs, %15:intregs = L2_loadrh_pi %14:intregs(tied-def 1), 2 :: (load (s16))
14+
# CHECK-NEXT: Base + 0 + I * 2, Len: 2
15+
# CHECK-NEXT: Result: No overlap
16+
1017
# CHECK-NOT: Rec NodeSet{{.+[[:space:]]}} SU(5){{.+[[:space:]]}} SU(7)
1118

1219
...

0 commit comments

Comments
 (0)