Skip to content

Commit 16f7e96

Browse files
authored
[AMDGPU] Allow rematerialization of instructions with virtual register uses (#124327)
Remove the restriction that scheduling rematerialization candidates cannot have virtual reg uses. Currently, this only allows for virtual reg uses which are already live at the rematerialization point, so bring in allUsesAvailableAt to check for this condition. Because of this condition, the uses of the remats will already be live in to the region, so the remat won't increase live-in pressure. Add an expensive check to check this condition.
1 parent f5d24e6 commit 16f7e96

File tree

4 files changed

+1407
-14
lines changed

4 files changed

+1407
-14
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 136 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,64 @@ void GCNSchedStage::revertScheduling() {
16151615
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
16161616
}
16171617

1618+
bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
1619+
SlotIndex OriginalIdx,
1620+
SlotIndex RematIdx) const {
1621+
1622+
LiveIntervals *LIS = DAG.LIS;
1623+
MachineRegisterInfo &MRI = DAG.MRI;
1624+
OriginalIdx = OriginalIdx.getRegSlot(true);
1625+
RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));
1626+
for (const MachineOperand &MO : InstToRemat->operands()) {
1627+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1628+
continue;
1629+
1630+
if (!MO.getReg().isVirtual()) {
1631+
// Do not attempt to reason about PhysRegs
1632+
// TODO: better analysis of PhysReg livness
1633+
if (!DAG.MRI.isConstantPhysReg(MO.getReg()) &&
1634+
!DAG.TII->isIgnorableUse(MO))
1635+
return false;
1636+
1637+
// Constant PhysRegs and IgnorableUses are okay
1638+
continue;
1639+
}
1640+
1641+
LiveInterval &LI = LIS->getInterval(MO.getReg());
1642+
const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);
1643+
assert(OVNI);
1644+
1645+
// Don't allow rematerialization immediately after the original def.
1646+
// It would be incorrect if InstToRemat redefines the register.
1647+
// See PR14098.
1648+
if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))
1649+
return false;
1650+
1651+
if (OVNI != LI.getVNInfoAt(RematIdx))
1652+
return false;
1653+
1654+
// Check that subrange is live at RematIdx.
1655+
if (LI.hasSubRanges()) {
1656+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1657+
unsigned SubReg = MO.getSubReg();
1658+
LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
1659+
: MRI.getMaxLaneMaskForVReg(MO.getReg());
1660+
for (LiveInterval::SubRange &SR : LI.subranges()) {
1661+
if ((SR.LaneMask & LM).none())
1662+
continue;
1663+
if (!SR.liveAt(RematIdx))
1664+
return false;
1665+
1666+
// Early exit if all used lanes are checked. No need to continue.
1667+
LM &= ~SR.LaneMask;
1668+
if (LM.none())
1669+
break;
1670+
}
1671+
}
1672+
}
1673+
return true;
1674+
}
1675+
16181676
void PreRARematStage::collectRematerializableInstructions() {
16191677
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
16201678
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
@@ -1636,6 +1694,47 @@ void PreRARematStage::collectRematerializableInstructions() {
16361694
if (Def->getParent() == UseI->getParent())
16371695
continue;
16381696

1697+
bool HasRematDependency = false;
1698+
// Check if this instruction uses any registers that are planned to be
1699+
// rematerialized
1700+
for (auto &RematEntry : RematerializableInsts) {
1701+
if (find_if(RematEntry.second,
1702+
[&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
1703+
for (MachineOperand &MO : Def->operands()) {
1704+
if (!MO.isReg())
1705+
continue;
1706+
if (MO.getReg() == Remat.first->getOperand(0).getReg())
1707+
return true;
1708+
}
1709+
return false;
1710+
}) != RematEntry.second.end()) {
1711+
HasRematDependency = true;
1712+
break;
1713+
}
1714+
}
1715+
// Do not rematerialize an instruction if it uses an instruction that we
1716+
// have designated for rematerialization.
1717+
// FIXME: Allow for rematerialization chains: this requires 1. updating
1718+
// remat points to account for uses that are rematerialized, and 2. either
1719+
// rematerializing the candidates in careful ordering, or deferring the MBB
1720+
// RP walk until the entire chain has been rematerialized.
1721+
if (HasRematDependency)
1722+
continue;
1723+
1724+
// Similarly, check if the UseI is planned to be remat.
1725+
for (auto &RematEntry : RematerializableInsts) {
1726+
if (find_if(RematEntry.second,
1727+
[&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
1728+
return Remat.first == UseI;
1729+
}) != RematEntry.second.end()) {
1730+
HasRematDependency = true;
1731+
break;
1732+
}
1733+
}
1734+
1735+
if (HasRematDependency)
1736+
break;
1737+
16391738
// We are only collecting defs that are defined in another block and are
16401739
// live-through or used inside regions at MinOccupancy. This means that the
16411740
// register must be in the live-in set for the region.
@@ -1644,8 +1743,13 @@ void PreRARematStage::collectRematerializableInstructions() {
16441743
auto It = DAG.LiveIns[I].find(Reg);
16451744
if (It != DAG.LiveIns[I].end() && !It->second.none()) {
16461745
if (DAG.RegionsWithMinOcc[I]) {
1647-
RematerializableInsts[I][Def] = UseI;
1648-
AddedToRematList = true;
1746+
SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
1747+
SlotIndex UseIdx =
1748+
DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
1749+
if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
1750+
RematerializableInsts[I][Def] = UseI;
1751+
AddedToRematList = true;
1752+
}
16491753
}
16501754

16511755
// Collect regions with rematerializable reg as live-in to avoid
@@ -1719,6 +1823,35 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
17191823
Register DefReg = Def->getOperand(0).getReg();
17201824
TotalSinkableRegs +=
17211825
SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1826+
#ifdef EXPENSIVE_CHECKS
1827+
// All uses are known to be available / live at the remat point. Thus, the
1828+
// uses should already be live in to the region.
1829+
for (MachineOperand &MO : Def->operands()) {
1830+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1831+
continue;
1832+
1833+
Register UseReg = MO.getReg();
1834+
if (!UseReg.isVirtual())
1835+
continue;
1836+
1837+
LiveInterval &LI = LIS->getInterval(UseReg);
1838+
LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
1839+
if (LI.hasSubRanges() && MO.getSubReg())
1840+
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
1841+
1842+
assert(NewLiveIns[I].contains(UseReg));
1843+
LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
1844+
LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
1845+
// If this register has lanes not covered by the LiveIns, be sure they
1846+
// do not map to any subrange. ref:
1847+
// machine-scheduler-sink-trivial-remats.mir::omitted_subrange
1848+
if (UncoveredLanes.any()) {
1849+
assert(LI.hasSubRanges());
1850+
for (LiveInterval::SubRange &SR : LI.subranges())
1851+
assert((SR.LaneMask & UncoveredLanes).none());
1852+
}
1853+
}
1854+
#endif
17221855
}
17231856
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
17241857
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
@@ -1734,10 +1867,7 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
17341867
MachineBasicBlock::iterator InsertPos =
17351868
MachineBasicBlock::iterator(It.second);
17361869
Register Reg = Def->getOperand(0).getReg();
1737-
// Rematerialize MI to its use block. Since we are only rematerializing
1738-
// instructions that do not have any virtual reg uses, we do not need to
1739-
// call LiveRangeEdit::allUsesAvailableAt() and
1740-
// LiveRangeEdit::canRematerializeAt().
1870+
// Rematerialize MI to its use block.
17411871
TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
17421872
Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
17431873
MachineInstr *NewMI = &*std::prev(InsertPos);
@@ -1847,9 +1977,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
18471977
return false;
18481978

18491979
for (const MachineOperand &MO : MI.all_uses()) {
1850-
if (MO.getReg().isVirtual())
1851-
return false;
1852-
18531980
// We can't remat physreg uses, unless it is a constant or an ignorable
18541981
// use (e.g. implicit exec use on VALU instructions)
18551982
if (MO.getReg().isPhysical()) {

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,12 @@ class PreRARematStage : public GCNSchedStage {
456456
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
457457
const TargetInstrInfo *TII);
458458

459+
/// \p Returns true if all the uses in \p InstToRemat defined at \p
460+
/// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
461+
/// reg uses.
462+
bool allUsesAvailableAt(const MachineInstr *InstToRemat,
463+
SlotIndex OriginalIdx, SlotIndex RematIdx) const;
464+
459465
public:
460466
bool initGCNSchedStage() override;
461467

0 commit comments

Comments
 (0)