Skip to content

Commit e134b60

Browse files
committed
[AMDGPU] Allow rematerialization of instructions with virtual register uses
Change-Id: Ib0e74e4abf1aacd1f36bce8a09db618d5f40165e
1 parent e06b703 commit e134b60

File tree

4 files changed

+1096
-68
lines changed

4 files changed

+1096
-68
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 82 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,59 @@ void GCNSchedStage::revertScheduling() {
16151615
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
16161616
}
16171617

1618+
/// allUsesAvailableAt - Return true if all registers used by InstToRemat at
1619+
/// OriginalIdx are also available with the same value at RematIdx.
1620+
bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
1621+
SlotIndex OriginalIdx,
1622+
SlotIndex RematIdx) const {
1623+
1624+
LiveIntervals *LIS = DAG.LIS;
1625+
MachineRegisterInfo &MRI = DAG.MRI;
1626+
OriginalIdx = OriginalIdx.getRegSlot(true);
1627+
RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));
1628+
for (const MachineOperand &MO : InstToRemat->operands()) {
1629+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1630+
continue;
1631+
1632+
if (!MO.getReg().isVirtual())
1633+
continue;
1634+
1635+
LiveInterval &LI = LIS->getInterval(MO.getReg());
1636+
const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);
1637+
assert(OVNI);
1638+
1639+
// Don't allow rematerialization immediately after the original def.
1640+
// It would be incorrect if InstToRemat redefines the register.
1641+
// See PR14098.
1642+
if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))
1643+
return false;
1644+
1645+
if (OVNI != LI.getVNInfoAt(RematIdx))
1646+
return false;
1647+
1648+
// Check that subrange is live at RematIdx.
1649+
if (LI.hasSubRanges()) {
1650+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1651+
unsigned SubReg = MO.getSubReg();
1652+
LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
1653+
: MRI.getMaxLaneMaskForVReg(MO.getReg());
1654+
for (LiveInterval::SubRange &SR : LI.subranges()) {
1655+
if ((SR.LaneMask & LM).none())
1656+
continue;
1657+
if (!SR.liveAt(RematIdx))
1658+
return false;
1659+
1660+
// Early exit if all used lanes are checked. No need to continue.
1661+
LM &= ~SR.LaneMask;
1662+
if (LM.none())
1663+
break;
1664+
}
1665+
assert(LM.none());
1666+
}
1667+
}
1668+
return true;
1669+
}
1670+
16181671
void PreRARematStage::collectRematerializableInstructions() {
16191672
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
16201673
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
@@ -1629,7 +1682,7 @@ void PreRARematStage::collectRematerializableInstructions() {
16291682

16301683
MachineOperand *Op = DAG.MRI.getOneDef(Reg);
16311684
MachineInstr *Def = Op->getParent();
1632-
if (Op->getSubReg() != 0 || !isTriviallyReMaterializable(*Def))
1685+
if (Op->getSubReg() != 0 || !DAG.TII->isTriviallyReMaterializable(*Def))
16331686
continue;
16341687

16351688
MachineInstr *UseI = &*DAG.MRI.use_instr_nodbg_begin(Reg);
@@ -1644,8 +1697,13 @@ void PreRARematStage::collectRematerializableInstructions() {
16441697
auto It = DAG.LiveIns[I].find(Reg);
16451698
if (It != DAG.LiveIns[I].end() && !It->second.none()) {
16461699
if (DAG.RegionsWithMinOcc[I]) {
1647-
RematerializableInsts[I][Def] = UseI;
1648-
AddedToRematList = true;
1700+
SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
1701+
SlotIndex UseIdx =
1702+
DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
1703+
if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
1704+
RematerializableInsts[I][Def] = UseI;
1705+
AddedToRematList = true;
1706+
}
16491707
}
16501708

16511709
// Collect regions with rematerializable reg as live-in to avoid
@@ -1719,6 +1777,27 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
17191777
Register DefReg = Def->getOperand(0).getReg();
17201778
TotalSinkableRegs +=
17211779
SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1780+
#ifdef EXPENSIVE_CHECKS
1781+
// All uses are known to be available / live at the remat point. Thus, the
1782+
// uses should already be live in to the region.
1783+
for (MachineOperand &MO : Def->operands()) {
1784+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1785+
continue;
1786+
1787+
Register UseReg = MO.getReg();
1788+
if (!UseReg.isVirtual())
1789+
continue;
1790+
1791+
LiveInterval &LI = LIS->getInterval(UseReg);
1792+
LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
1793+
if (LI.hasSubRanges() && MO.getSubReg())
1794+
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
1795+
1796+
assert(NewLiveIns[I].contains(UseReg));
1797+
LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
1798+
assert((LiveInMask & LM) == LM);
1799+
}
1800+
#endif
17221801
}
17231802
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
17241803
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
@@ -1842,18 +1921,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
18421921
return true;
18431922
}
18441923

1845-
// Copied from MachineLICM
1846-
bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1847-
if (!DAG.TII->isTriviallyReMaterializable(MI))
1848-
return false;
1849-
1850-
for (const MachineOperand &MO : MI.all_uses())
1851-
if (MO.getReg().isVirtual())
1852-
return false;
1853-
1854-
return true;
1855-
}
1856-
18571924
// When removing, we will have to check both beginning and ending of the region.
18581925
// When inserting, we will only have to check if we are inserting NewMI in front
18591926
// of a scheduling region and do not need to check the ending since we will only

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -448,14 +448,15 @@ class PreRARematStage : public GCNSchedStage {
448448
// and single use outside the defining block into RematerializableInsts.
449449
void collectRematerializableInstructions();
450450

451-
bool isTriviallyReMaterializable(const MachineInstr &MI);
452-
453451
// TODO: Should also attempt to reduce RP of SGPRs and AGPRs
454452
// Attempt to reduce RP of VGPR by sinking trivially rematerializable
455453
// instructions. Returns true if we were able to sink instruction(s).
456454
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
457455
const TargetInstrInfo *TII);
458456

457+
bool allUsesAvailableAt(const MachineInstr *InstToRemat,
458+
SlotIndex OriginalIdx, SlotIndex RematIdx) const;
459+
459460
public:
460461
bool initGCNSchedStage() override;
461462

0 commit comments

Comments
 (0)