Skip to content

[AMDGPU] Allow rematerialization of instructions with virtual register uses #124327

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 83 additions & 5 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1615,6 +1615,61 @@ void GCNSchedStage::revertScheduling() {
DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd);
}

bool PreRARematStage::allUsesAvailableAt(const MachineInstr *InstToRemat,
SlotIndex OriginalIdx,
SlotIndex RematIdx) const {

LiveIntervals *LIS = DAG.LIS;
MachineRegisterInfo &MRI = DAG.MRI;
OriginalIdx = OriginalIdx.getRegSlot(true);
RematIdx = std::max(RematIdx, RematIdx.getRegSlot(true));
for (const MachineOperand &MO : InstToRemat->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;

// Do not attempt to reason about PhysRegs
if (!MO.getReg().isVirtual()) {
assert(DAG.MRI.isConstantPhysReg(MO.getReg()) ||
DAG.TII->isIgnorableUse(MO));
continue;
}

LiveInterval &LI = LIS->getInterval(MO.getReg());
const VNInfo *OVNI = LI.getVNInfoAt(OriginalIdx);
assert(OVNI);

// Don't allow rematerialization immediately after the original def.
// It would be incorrect if InstToRemat redefines the register.
// See PR14098.
if (SlotIndex::isSameInstr(OriginalIdx, RematIdx))
return false;

if (OVNI != LI.getVNInfoAt(RematIdx))
return false;

// Check that subrange is live at RematIdx.
if (LI.hasSubRanges()) {
const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
unsigned SubReg = MO.getSubReg();
LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask(SubReg)
: MRI.getMaxLaneMaskForVReg(MO.getReg());
for (LiveInterval::SubRange &SR : LI.subranges()) {
if ((SR.LaneMask & LM).none())
continue;
if (!SR.liveAt(RematIdx))
return false;

// Early exit if all used lanes are checked. No need to continue.
LM &= ~SR.LaneMask;
if (LM.none())
break;
}
assert(LM.none());
}
}
return true;
}

void PreRARematStage::collectRematerializableInstructions() {
const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo *>(DAG.TRI);
for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
Expand Down Expand Up @@ -1644,8 +1699,13 @@ void PreRARematStage::collectRematerializableInstructions() {
auto It = DAG.LiveIns[I].find(Reg);
if (It != DAG.LiveIns[I].end() && !It->second.none()) {
if (DAG.RegionsWithMinOcc[I]) {
RematerializableInsts[I][Def] = UseI;
AddedToRematList = true;
SlotIndex DefIdx = DAG.LIS->getInstructionIndex(*Def);
SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseI).getRegSlot(true);
if (allUsesAvailableAt(Def, DefIdx, UseIdx)) {
RematerializableInsts[I][Def] = UseI;
AddedToRematList = true;
}
}

// Collect regions with rematerializable reg as live-in to avoid
Expand Down Expand Up @@ -1719,6 +1779,27 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
Register DefReg = Def->getOperand(0).getReg();
TotalSinkableRegs +=
SIRegisterInfo::getNumCoveredRegs(NewLiveIns[I][DefReg]);
#ifdef EXPENSIVE_CHECKS
// All uses are known to be available / live at the remat point. Thus, the
// uses should already be live in to the region.
for (MachineOperand &MO : Def->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;

Register UseReg = MO.getReg();
if (!UseReg.isVirtual())
continue;

LiveInterval &LI = LIS->getInterval(UseReg);
LaneBitmask LM = DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
if (LI.hasSubRanges() && MO.getSubReg())
LM = DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());

assert(NewLiveIns[I].contains(UseReg));
LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
assert((LiveInMask & LM) == LM);
}
#endif
}
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs(VGPRsAfterSink);
Expand Down Expand Up @@ -1847,9 +1928,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
return false;

for (const MachineOperand &MO : MI.all_uses()) {
if (MO.getReg().isVirtual())
return false;

// We can't remat physreg uses, unless it is a constant or an ignorable
// use (e.g. implicit exec use on VALU instructions)
if (MO.getReg().isPhysical()) {
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,12 @@ class PreRARematStage : public GCNSchedStage {
bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
const TargetInstrInfo *TII);

/// \p Returns true if all the uses in \p InstToRemat defined at \p
/// OriginalIdx are live at \p RematIdx. This only checks liveness of virtual
/// reg uses.
bool allUsesAvailableAt(const MachineInstr *InstToRemat,
SlotIndex OriginalIdx, SlotIndex RematIdx) const;

public:
bool initGCNSchedStage() override;

Expand Down
Loading
Loading