@@ -1615,6 +1615,64 @@ void GCNSchedStage::revertScheduling() {
1615
1615
DAG.Regions [RegionIdx] = std::pair (DAG.RegionBegin , DAG.RegionEnd );
1616
1616
}
1617
1617
1618
+ bool PreRARematStage::allUsesAvailableAt (const MachineInstr *InstToRemat,
1619
+ SlotIndex OriginalIdx,
1620
+ SlotIndex RematIdx) const {
1621
+
1622
+ LiveIntervals *LIS = DAG.LIS ;
1623
+ MachineRegisterInfo &MRI = DAG.MRI ;
1624
+ OriginalIdx = OriginalIdx.getRegSlot (true );
1625
+ RematIdx = std::max (RematIdx, RematIdx.getRegSlot (true ));
1626
+ for (const MachineOperand &MO : InstToRemat->operands ()) {
1627
+ if (!MO.isReg () || !MO.getReg () || !MO.readsReg ())
1628
+ continue ;
1629
+
1630
+ if (!MO.getReg ().isVirtual ()) {
1631
+ // Do not attempt to reason about PhysRegs
1632
+ // TODO: better analysis of PhysReg livness
1633
+ if (!DAG.MRI .isConstantPhysReg (MO.getReg ()) &&
1634
+ !DAG.TII ->isIgnorableUse (MO))
1635
+ return false ;
1636
+
1637
+ // Constant PhysRegs and IgnorableUses are okay
1638
+ continue ;
1639
+ }
1640
+
1641
+ LiveInterval &LI = LIS->getInterval (MO.getReg ());
1642
+ const VNInfo *OVNI = LI.getVNInfoAt (OriginalIdx);
1643
+ assert (OVNI);
1644
+
1645
+ // Don't allow rematerialization immediately after the original def.
1646
+ // It would be incorrect if InstToRemat redefines the register.
1647
+ // See PR14098.
1648
+ if (SlotIndex::isSameInstr (OriginalIdx, RematIdx))
1649
+ return false ;
1650
+
1651
+ if (OVNI != LI.getVNInfoAt (RematIdx))
1652
+ return false ;
1653
+
1654
+ // Check that subrange is live at RematIdx.
1655
+ if (LI.hasSubRanges ()) {
1656
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo ();
1657
+ unsigned SubReg = MO.getSubReg ();
1658
+ LaneBitmask LM = SubReg ? TRI->getSubRegIndexLaneMask (SubReg)
1659
+ : MRI.getMaxLaneMaskForVReg (MO.getReg ());
1660
+ for (LiveInterval::SubRange &SR : LI.subranges ()) {
1661
+ if ((SR.LaneMask & LM).none ())
1662
+ continue ;
1663
+ if (!SR.liveAt (RematIdx))
1664
+ return false ;
1665
+
1666
+ // Early exit if all used lanes are checked. No need to continue.
1667
+ LM &= ~SR.LaneMask ;
1668
+ if (LM.none ())
1669
+ break ;
1670
+ }
1671
+ }
1672
+ }
1673
+ return true ;
1674
+ }
1675
+
1618
1676
void PreRARematStage::collectRematerializableInstructions () {
1619
1677
const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo *>(DAG.TRI );
1620
1678
for (unsigned I = 0 , E = DAG.MRI .getNumVirtRegs (); I != E; ++I) {
@@ -1636,6 +1694,47 @@ void PreRARematStage::collectRematerializableInstructions() {
1636
1694
if (Def->getParent () == UseI->getParent ())
1637
1695
continue ;
1638
1696
1697
+ bool HasRematDependency = false ;
1698
+ // Check if this instruction uses any registers that are planned to be
1699
+ // rematerialized
1700
+ for (auto &RematEntry : RematerializableInsts) {
1701
+ if (find_if (RematEntry.second ,
1702
+ [&Def](std::pair<MachineInstr *, MachineInstr *> &Remat) {
1703
+ for (MachineOperand &MO : Def->operands ()) {
1704
+ if (!MO.isReg ())
1705
+ continue ;
1706
+ if (MO.getReg () == Remat.first ->getOperand (0 ).getReg ())
1707
+ return true ;
1708
+ }
1709
+ return false ;
1710
+ }) != RematEntry.second .end ()) {
1711
+ HasRematDependency = true ;
1712
+ break ;
1713
+ }
1714
+ }
1715
+ // Do not rematerialize an instruction if it uses an instruction that we
1716
+ // have designated for rematerialization.
1717
+ // FIXME: Allow for rematerialization chains: this requires 1. updating
1718
+ // remat points to account for uses that are rematerialized, and 2. either
1719
+ // rematerializing the candidates in careful ordering, or deferring the MBB
1720
+ // RP walk until the entire chain has been rematerialized.
1721
+ if (HasRematDependency)
1722
+ continue ;
1723
+
1724
+ // Similarly, check if the UseI is planned to be remat.
1725
+ for (auto &RematEntry : RematerializableInsts) {
1726
+ if (find_if (RematEntry.second ,
1727
+ [&UseI](std::pair<MachineInstr *, MachineInstr *> &Remat) {
1728
+ return Remat.first == UseI;
1729
+ }) != RematEntry.second .end ()) {
1730
+ HasRematDependency = true ;
1731
+ break ;
1732
+ }
1733
+ }
1734
+
1735
+ if (HasRematDependency)
1736
+ break ;
1737
+
1639
1738
// We are only collecting defs that are defined in another block and are
1640
1739
// live-through or used inside regions at MinOccupancy. This means that the
1641
1740
// register must be in the live-in set for the region.
@@ -1644,8 +1743,13 @@ void PreRARematStage::collectRematerializableInstructions() {
1644
1743
auto It = DAG.LiveIns [I].find (Reg);
1645
1744
if (It != DAG.LiveIns [I].end () && !It->second .none ()) {
1646
1745
if (DAG.RegionsWithMinOcc [I]) {
1647
- RematerializableInsts[I][Def] = UseI;
1648
- AddedToRematList = true ;
1746
+ SlotIndex DefIdx = DAG.LIS ->getInstructionIndex (*Def);
1747
+ SlotIndex UseIdx =
1748
+ DAG.LIS ->getInstructionIndex (*UseI).getRegSlot (true );
1749
+ if (allUsesAvailableAt (Def, DefIdx, UseIdx)) {
1750
+ RematerializableInsts[I][Def] = UseI;
1751
+ AddedToRematList = true ;
1752
+ }
1649
1753
}
1650
1754
1651
1755
// Collect regions with rematerializable reg as live-in to avoid
@@ -1719,6 +1823,35 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1719
1823
Register DefReg = Def->getOperand (0 ).getReg ();
1720
1824
TotalSinkableRegs +=
1721
1825
SIRegisterInfo::getNumCoveredRegs (NewLiveIns[I][DefReg]);
1826
+ #ifdef EXPENSIVE_CHECKS
1827
+ // All uses are known to be available / live at the remat point. Thus, the
1828
+ // uses should already be live in to the region.
1829
+ for (MachineOperand &MO : Def->operands ()) {
1830
+ if (!MO.isReg () || !MO.getReg () || !MO.readsReg ())
1831
+ continue ;
1832
+
1833
+ Register UseReg = MO.getReg ();
1834
+ if (!UseReg.isVirtual ())
1835
+ continue ;
1836
+
1837
+ LiveInterval &LI = LIS->getInterval (UseReg);
1838
+ LaneBitmask LM = DAG.MRI .getMaxLaneMaskForVReg (MO.getReg ());
1839
+ if (LI.hasSubRanges () && MO.getSubReg ())
1840
+ LM = DAG.TRI ->getSubRegIndexLaneMask (MO.getSubReg ());
1841
+
1842
+ assert (NewLiveIns[I].contains (UseReg));
1843
+ LaneBitmask LiveInMask = NewLiveIns[I][UseReg];
1844
+ LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
1845
+ // If this register has lanes not covered by the LiveIns, be sure they
1846
+ // do not map to any subrange. ref:
1847
+ // machine-scheduler-sink-trivial-remats.mir::omitted_subrange
1848
+ if (UncoveredLanes.any ()) {
1849
+ assert (LI.hasSubRanges ());
1850
+ for (LiveInterval::SubRange &SR : LI.subranges ())
1851
+ assert ((SR.LaneMask & UncoveredLanes).none ());
1852
+ }
1853
+ }
1854
+ #endif
1722
1855
}
1723
1856
int VGPRsAfterSink = VGPRUsage - TotalSinkableRegs;
1724
1857
unsigned OptimisticOccupancy = ST.getOccupancyWithNumVGPRs (VGPRsAfterSink);
@@ -1734,10 +1867,7 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
1734
1867
MachineBasicBlock::iterator InsertPos =
1735
1868
MachineBasicBlock::iterator (It.second );
1736
1869
Register Reg = Def->getOperand (0 ).getReg ();
1737
- // Rematerialize MI to its use block. Since we are only rematerializing
1738
- // instructions that do not have any virtual reg uses, we do not need to
1739
- // call LiveRangeEdit::allUsesAvailableAt() and
1740
- // LiveRangeEdit::canRematerializeAt().
1870
+ // Rematerialize MI to its use block.
1741
1871
TII->reMaterialize (*InsertPos->getParent (), InsertPos, Reg,
1742
1872
Def->getOperand (0 ).getSubReg (), *Def, *DAG.TRI );
1743
1873
MachineInstr *NewMI = &*std::prev (InsertPos);
@@ -1847,9 +1977,6 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
1847
1977
return false ;
1848
1978
1849
1979
for (const MachineOperand &MO : MI.all_uses ()) {
1850
- if (MO.getReg ().isVirtual ())
1851
- return false ;
1852
-
1853
1980
// We can't remat physreg uses, unless it is a constant or an ignorable
1854
1981
// use (e.g. implicit exec use on VALU instructions)
1855
1982
if (MO.getReg ().isPhysical ()) {
0 commit comments