@@ -615,6 +615,138 @@ bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand,
615
615
return false ;
616
616
}
617
617
618
+ GCNMaxMemoryClauseSchedStrategy::GCNMaxMemoryClauseSchedStrategy (
619
+ const MachineSchedContext *C)
620
+ : GCNSchedStrategy(C) {
621
+ SchedStages.push_back (GCNSchedStageID::MemoryClauseInitialSchedule);
622
+ }
623
+
624
+ // / GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as
625
+ // / much as possible. This is achieved by:
626
+ // 1. Prioritize clustered operations before stall latency heuristic.
627
+ // 2. Prioritize long-latency-load before stall latency heuristic.
628
+ // /
629
+ // / \param Cand provides the policy and current best candidate.
630
+ // / \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
631
+ // / \param Zone describes the scheduled zone that we are extending, or nullptr
632
+ // / if Cand is from a different zone than TryCand.
633
+ // / \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
634
+ bool GCNMaxMemoryClauseSchedStrategy::tryCandidate (SchedCandidate &Cand,
635
+ SchedCandidate &TryCand,
636
+ SchedBoundary *Zone) const {
637
+ // Initialize the candidate if needed.
638
+ if (!Cand.isValid ()) {
639
+ TryCand.Reason = NodeOrder;
640
+ return true ;
641
+ }
642
+
643
+ // Bias PhysReg Defs and copies to their uses and defined respectively.
644
+ if (tryGreater (biasPhysReg (TryCand.SU , TryCand.AtTop ),
645
+ biasPhysReg (Cand.SU , Cand.AtTop ), TryCand, Cand, PhysReg))
646
+ return TryCand.Reason != NoCand;
647
+
648
+ if (DAG->isTrackingPressure ()) {
649
+ // Avoid exceeding the target's limit.
650
+ if (tryPressure (TryCand.RPDelta .Excess , Cand.RPDelta .Excess , TryCand, Cand,
651
+ RegExcess, TRI, DAG->MF ))
652
+ return TryCand.Reason != NoCand;
653
+
654
+ // Avoid increasing the max critical pressure in the scheduled region.
655
+ if (tryPressure (TryCand.RPDelta .CriticalMax , Cand.RPDelta .CriticalMax ,
656
+ TryCand, Cand, RegCritical, TRI, DAG->MF ))
657
+ return TryCand.Reason != NoCand;
658
+ }
659
+
660
+ // MaxMemoryClause-specific: We prioritize clustered instructions as we would
661
+ // get more benefit from clausing these memory instructions.
662
+ const SUnit *CandNextClusterSU =
663
+ Cand.AtTop ? DAG->getNextClusterSucc () : DAG->getNextClusterPred ();
664
+ const SUnit *TryCandNextClusterSU =
665
+ TryCand.AtTop ? DAG->getNextClusterSucc () : DAG->getNextClusterPred ();
666
+ if (tryGreater (TryCand.SU == TryCandNextClusterSU,
667
+ Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster))
668
+ return TryCand.Reason != NoCand;
669
+
670
+ // We only compare a subset of features when comparing nodes between
671
+ // Top and Bottom boundary. Some properties are simply incomparable, in many
672
+ // other instances we should only override the other boundary if something
673
+ // is a clear good pick on one boundary. Skip heuristics that are more
674
+ // "tie-breaking" in nature.
675
+ bool SameBoundary = Zone != nullptr ;
676
+ if (SameBoundary) {
677
+ // For loops that are acyclic path limited, aggressively schedule for
678
+ // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
679
+ // heuristics to take precedence.
680
+ if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps () &&
681
+ tryLatency (TryCand, Cand, *Zone))
682
+ return TryCand.Reason != NoCand;
683
+
684
+ // MaxMemoryClause-specific: Prioritize long latency memory load
685
+ // instructions in top-bottom order to hide more latency. The mayLoad check
686
+ // is used to exclude store-like instructions, which we do not want to
687
+ // scheduler them too early.
688
+ bool TryMayLoad =
689
+ TryCand.SU ->isInstr () && TryCand.SU ->getInstr ()->mayLoad ();
690
+ bool CandMayLoad = Cand.SU ->isInstr () && Cand.SU ->getInstr ()->mayLoad ();
691
+
692
+ if (TryMayLoad || CandMayLoad) {
693
+ bool TryLongLatency =
694
+ TryCand.SU ->Latency > 10 * Cand.SU ->Latency && TryMayLoad;
695
+ bool CandLongLatency =
696
+ 10 * TryCand.SU ->Latency < Cand.SU ->Latency && CandMayLoad;
697
+
698
+ if (tryGreater (Zone->isTop () ? TryLongLatency : CandLongLatency,
699
+ Zone->isTop () ? CandLongLatency : TryLongLatency, TryCand,
700
+ Cand, Stall))
701
+ return TryCand.Reason != NoCand;
702
+ }
703
+ // Prioritize instructions that read unbuffered resources by stall cycles.
704
+ if (tryLess (Zone->getLatencyStallCycles (TryCand.SU ),
705
+ Zone->getLatencyStallCycles (Cand.SU ), TryCand, Cand, Stall))
706
+ return TryCand.Reason != NoCand;
707
+ }
708
+
709
+ if (SameBoundary) {
710
+ // Weak edges are for clustering and other constraints.
711
+ if (tryLess (getWeakLeft (TryCand.SU , TryCand.AtTop ),
712
+ getWeakLeft (Cand.SU , Cand.AtTop ), TryCand, Cand, Weak))
713
+ return TryCand.Reason != NoCand;
714
+ }
715
+
716
+ // Avoid increasing the max pressure of the entire region.
717
+ if (DAG->isTrackingPressure () &&
718
+ tryPressure (TryCand.RPDelta .CurrentMax , Cand.RPDelta .CurrentMax , TryCand,
719
+ Cand, RegMax, TRI, DAG->MF ))
720
+ return TryCand.Reason != NoCand;
721
+
722
+ if (SameBoundary) {
723
+ // Avoid critical resource consumption and balance the schedule.
724
+ TryCand.initResourceDelta (DAG, SchedModel);
725
+ if (tryLess (TryCand.ResDelta .CritResources , Cand.ResDelta .CritResources ,
726
+ TryCand, Cand, ResourceReduce))
727
+ return TryCand.Reason != NoCand;
728
+ if (tryGreater (TryCand.ResDelta .DemandedResources ,
729
+ Cand.ResDelta .DemandedResources , TryCand, Cand,
730
+ ResourceDemand))
731
+ return TryCand.Reason != NoCand;
732
+
733
+ // Avoid serializing long latency dependence chains.
734
+ // For acyclic path limited loops, latency was already checked above.
735
+ if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy .ReduceLatency &&
736
+ !Rem.IsAcyclicLatencyLimited && tryLatency (TryCand, Cand, *Zone))
737
+ return TryCand.Reason != NoCand;
738
+
739
+ // Fall through to original instruction order.
740
+ if (Zone->isTop () == (TryCand.SU ->NodeNum < Cand.SU ->NodeNum )) {
741
+ assert (TryCand.SU ->NodeNum != Cand.SU ->NodeNum );
742
+ TryCand.Reason = NodeOrder;
743
+ return true ;
744
+ }
745
+ }
746
+
747
+ return false ;
748
+ }
749
+
618
750
GCNScheduleDAGMILive::GCNScheduleDAGMILive (
619
751
MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S)
620
752
: ScheduleDAGMILive(C, std::move(S)), ST(MF.getSubtarget<GCNSubtarget>()),
@@ -644,6 +776,9 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
644
776
return std::make_unique<PreRARematStage>(SchedStageID, *this );
645
777
case GCNSchedStageID::ILPInitialSchedule:
646
778
return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *this );
779
+ case GCNSchedStageID::MemoryClauseInitialSchedule:
780
+ return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
781
+ *this );
647
782
}
648
783
649
784
llvm_unreachable (" Unknown SchedStageID." );
@@ -869,6 +1004,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
869
1004
case GCNSchedStageID::ILPInitialSchedule:
870
1005
OS << " Max ILP Initial Schedule" ;
871
1006
break ;
1007
+ case GCNSchedStageID::MemoryClauseInitialSchedule:
1008
+ OS << " Max memory clause Initial Schedule" ;
1009
+ break ;
872
1010
}
873
1011
874
1012
return OS;
@@ -1088,7 +1226,8 @@ void GCNSchedStage::setupNewBlock() {
1088
1226
// Get real RP for the region if it hasn't be calculated before. After the
1089
1227
// initial schedule stage real RP will be collected after scheduling.
1090
1228
if (StageID == GCNSchedStageID::OccInitialSchedule ||
1091
- StageID == GCNSchedStageID::ILPInitialSchedule)
1229
+ StageID == GCNSchedStageID::ILPInitialSchedule ||
1230
+ StageID == GCNSchedStageID::MemoryClauseInitialSchedule)
1092
1231
DAG.computeBlockPressure (RegionIdx, CurrentMBB);
1093
1232
}
1094
1233
@@ -1389,6 +1528,11 @@ bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
1389
1528
return false ;
1390
1529
}
1391
1530
1531
+ bool MemoryClauseInitialScheduleStage::shouldRevertScheduling (
1532
+ unsigned WavesAfter) {
1533
+ return mayCauseSpilling (WavesAfter);
1534
+ }
1535
+
1392
1536
bool GCNSchedStage::mayCauseSpilling (unsigned WavesAfter) {
1393
1537
if (WavesAfter <= MFI.getMinWavesPerEU () && isRegionWithExcessRP () &&
1394
1538
!PressureAfter.less (MF, PressureBefore)) {
0 commit comments