Skip to content

Commit 91c69c5

Browse files
committed
[AMDGPU] Allow rematerialization of instructions with virtual register uses
Change-Id: I638fae40762a7f6b9095c50090a247554632eb94
1 parent e77d428 commit 91c69c5

File tree

4 files changed

+1103
-10
lines changed

4 files changed

+1103
-10
lines changed

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1615,6 +1615,61 @@ void GCNSchedStage::revertScheduling() {
16151615
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
16161616
}
16171617

1618+
bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
1619+
SlotIndex OriginalIdx,
1620+
SlotIndex RematIdx) const {
1621+
1622+
LiveIntervals *LIS = DAG.LIS;
1623+
MachineRegisterInfo &MRI = DAG.MRI;
1624+
OriginalIdx = OriginalIdx.getRegSlot(true);
1625+
RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));
1626+
for (const MachineOperand &MO : InstToRemat->operands()) {
1627+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1628+
continue;
1629+
1630+
// Do not attempt to reason about PhysRegs
1631+
if (!MO.getReg().isVirtual()) {
1632+
assert(DAG.MRI.isConstantPhysReg(MO.getReg()) ||
1633+
DAG.TII->isIgnorableUse(MO));
1634+
continue;
1635+
}
1636+
1637+
LiveInterval &LI = LIS->getInterval(MO.getReg());
1638+
const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);
1639+
assert(OVNI);
1640+
1641+
// Don't allow rematerialization immediately after the original def.
1642+
// It would be incorrect if InstToRemat redefines the register.
1643+
// See PR14098.
1644+
if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))
1645+
return false;
1646+
1647+
if (OVNI != LI.getVNInfoAt(RematIdx))
1648+
return false;
1649+
1650+
// Check that subrange is live at RematIdx.
1651+
if (LI.hasSubRanges()) {
1652+
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
1653+
unsigned SubReg = MO.getSubReg();
1654+
LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
1655+
: MRI.getMaxLaneMaskForVReg(MO.getReg());
1656+
for (LiveInterval::SubRange &SR : LI.subranges()) {
1657+
if ((SR.LaneMask & LM).none())
1658+
continue;
1659+
if (!SR.liveAt(RematIdx))
1660+
return false;
1661+
1662+
// Early exit if all used lanes are checked. No need to continue.
1663+
LM &= ~SR.LaneMask;
1664+
if (LM.none())
1665+
break;
1666+
}
1667+
assert(LM.none());
1668+
}
1669+
}
1670+
return true;
1671+
}
1672+
16181673
void PreRARematStage::collectRematerializableInstructions() {
16191674
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
16201675
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
@@ -1644,8 +1699,13 @@ void PreRARematStage::collectRematerializableInstructions() {
16441699
auto It = DAG.LiveIns[I].find(Reg);
16451700
if (It != DAG.LiveIns[I].end() && !It->second.none()) {
16461701
if (DAG.RegionsWithMinOcc[I]) {
1647-
RematerializableInsts[I][Def] = UseI;
1648-
AddedToRematList = true;
1702+
SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
1703+
SlotIndex UseIdx =
1704+
DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
1705+
if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
1706+
RematerializableInsts[I][Def] = UseI;
1707+
AddedToRematList = true;
1708+
}
16491709
}
16501710

16511711
// Collect regions with rematerializable reg as live-in to avoid
@@ -1719,6 +1779,27 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
17191779
Register DefReg = Def->getOperand(0).getReg();
17201780
TotalSinkableRegs +=
17211781
SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
1782+
#ifdef EXPENSIVE_CHECKS
1783+
// All uses are known to be available / live at the remat point. Thus, the
1784+
// uses should already be live in to the region.
1785+
for (MachineOperand &MO : Def->operands()) {
1786+
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1787+
continue;
1788+
1789+
Register UseReg = MO.getReg();
1790+
if (!UseReg.isVirtual())
1791+
continue;
1792+
1793+
LiveInterval &LI = LIS->getInterval(UseReg);
1794+
LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
1795+
if (LI.hasSubRanges() && MO.getSubReg())
1796+
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
1797+
1798+
assert(NewLiveIns[I].contains(UseReg));
1799+
LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
1800+
assert((LiveInMask & LM) == LM);
1801+
}
1802+
#endif
17221803
}
17231804
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
17241805
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
@@ -1847,9 +1928,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
18471928
return false;
18481929

18491930
for (const MachineOperand &MO : MI.all_uses()) {
1850-
if (MO.getReg().isVirtual())
1851-
return false;
1852-
18531931
// We can't remat physreg uses, unless it is a constant or an ignorable
18541932
// use (e.g. implicit exec use on VALU instructions)
18551933
if (MO.getReg().isPhysical()) {

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,12 @@ class PreRARematStage : public GCNSchedStage {
456456
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
457457
const TargetInstrInfo *TII);
458458

459+
/// \p Returns true if all the uses in \p InstToRemat defined at \p
460+
/// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
461+
/// reg uses.
462+
bool allUsesAvailableAt(const MachineInstr *InstToRemat,
463+
SlotIndex OriginalIdx, SlotIndex RematIdx) const;
464+
459465
public:
460466
bool initGCNSchedStage() override;
461467

0 commit comments

Comments
 (0)