Skip to content

[AMDGPU] Allow rematerialization of instructions with virtual register uses #124327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 136 additions & 9 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1615,6 +1615,64 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}

bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
SlotIndex OriginalIdx,
SlotIndex RematIdx) const {

LiveIntervals *LIS = DAG.LIS;
MachineRegisterInfo &MRI = DAG.MRI;
OriginalIdx = OriginalIdx.getRegSlot(true);
RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));
for (const MachineOperand &MO : InstToRemat->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;

if (!MO.getReg().isVirtual()) {
// Do not attempt to reason about PhysRegs
// TODO: better analysis of PhysReg livness
if (!DAG.MRI.isConstantPhysReg(MO.getReg()) &&
!DAG.TII->isIgnorableUse(MO))
return false;

// Constant PhysRegs and IgnorableUses are okay
continue;
}

LiveInterval &LI = LIS->getInterval(MO.getReg());
const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);
assert(OVNI);

// Don't allow rematerialization immediately after the original def.
// It would be incorrect if InstToRemat redefines the register.
// See PR14098.
if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))
return false;

if (OVNI != LI.getVNInfoAt(RematIdx))
return false;

// Check that subrange is live at RematIdx.
if (LI.hasSubRanges()) {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
unsigned SubReg = MO.getSubReg();
LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
: MRI.getMaxLaneMaskForVReg(MO.getReg());
for (LiveInterval::SubRange &SR : LI.subranges()) {
if ((SR.LaneMask & LM).none())
continue;
if (!SR.liveAt(RematIdx))
return false;

// Early exit if all used lanes are checked. No need to continue.
LM &= ~SR.LaneMask;
if (LM.none())
break;
}
}
}
return true;
}

void PreRARematStage::collectRematerializableInstructions() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
Expand All @@ -1636,6 +1694,47 @@ void PreRARematStage::collectRematerializableInstructions() {
if (Def->getParent() == UseI->getParent())
continue;

bool HasRematDependency = false;
// Check if this instruction uses any registers that are planned to be
// rematerialized
for (auto &RematEntry : RematerializableInsts) {
if (find_if(RematEntry.second,
[&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
for (MachineOperand &MO : Def->operands()) {
if (!MO.isReg())
continue;
if (MO.getReg() == Remat.first->getOperand(0).getReg())
return true;
}
return false;
}) != RematEntry.second.end()) {
HasRematDependency = true;
break;
}
}
// Do not rematerialize an instruction if it uses an instruction that we
// have designated for rematerialization.
// FIXME: Allow for rematerialization chains: this requires 1. updating
// remat points to account for uses that are rematerialized, and 2. either
// rematerializing the candidates in careful ordering, or deferring the MBB
// RP walk until the entire chain has been rematerialized.
if (HasRematDependency)
continue;

// Similarly, check if the UseI is planned to be remat.
for (auto &RematEntry : RematerializableInsts) {
if (find_if(RematEntry.second,
[&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
return Remat.first == UseI;
}) != RematEntry.second.end()) {
HasRematDependency = true;
break;
}
}

if (HasRematDependency)
break;

// We are only collecting defs that are defined in another block and are
// live-through or used inside regions at MinOccupancy. This means that the
// register must be in the live-in set for the region.
Expand All @@ -1644,8 +1743,13 @@ void PreRARematStage::collectRematerializableInstructions() {
auto It = DAG.LiveIns[I].find(Reg);
if (It != DAG.LiveIns[I].end() && !It->second.none()) {
if (DAG.RegionsWithMinOcc[I]) {
RematerializableInsts[I][Def] = UseI;
AddedToRematList = true;
SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
RematerializableInsts[I][Def] = UseI;
AddedToRematList = true;
}
}

// Collect regions with rematerializable reg as live-in to avoid
Expand Down Expand Up @@ -1719,6 +1823,35 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
Register DefReg = Def->getOperand(0).getReg();
TotalSinkableRegs +=
SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
#ifdef EXPENSIVE_CHECKS
// All uses are known to be available / live at the remat point. Thus, the
// uses should already be live in to the region.
for (MachineOperand &MO : Def->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;

Register UseReg = MO.getReg();
if (!UseReg.isVirtual())
continue;

LiveInterval &LI = LIS->getInterval(UseReg);
LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());

assert(NewLiveIns[I].contains(UseReg));
LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
// If this register has lanes not covered by the LiveIns, be sure they
// do not map to any subrange. ref:
// machine-scheduler-sink-trivial-remats.mir::omitted_subrange
if (UncoveredLanes.any()) {
assert(LI.hasSubRanges());
for (LiveInterval::SubRange &SR : LI.subranges())
assert((SR.LaneMask & UncoveredLanes).none());
}
}
#endif
}
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
Expand All @@ -1734,10 +1867,7 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
MachineBasicBlock::iterator InsertPos =
MachineBasicBlock::iterator(It.second);
Register Reg = Def->getOperand(0).getReg();
// Rematerialize MI to its use block. Since we are only rematerializing
// instructions that do not have any virtual reg uses, we do not need to
// call LiveRangeEdit::allUsesAvailableAt() and
// LiveRangeEdit::canRematerializeAt().
// Rematerialize MI to its use block.
TII->reMaterialize(*InsertPos->getParent(), InsertPos, Reg,
Def->getOperand(0).getSubReg(), *Def, *DAG.TRI);
MachineInstr *NewMI = &*std::prev(InsertPos);
Expand Down Expand Up @@ -1847,9 +1977,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
return false;

for (const MachineOperand &MO : MI.all_uses()) {
if (MO.getReg().isVirtual())
return false;

// We can't remat physreg uses, unless it is a constant or an ignorable
// use (e.g. implicit exec use on VALU instructions)
if (MO.getReg().isPhysical()) {
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,12 @@ class PreRARematStage : public GCNSchedStage {
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
const TargetInstrInfo *TII);

/// \p Returns true if all the uses in \p InstToRemat defined at \p
/// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
/// reg uses.
bool allUsesAvailableAt(const MachineInstr *InstToRemat,
SlotIndex OriginalIdx, SlotIndex RematIdx) const;

public:
bool initGCNSchedStage() override;

Expand Down
Loading