llvm · dtcxzyw · Jun 13, 2024 · Mar 8, 2024 · Mar 12, 2024 · Apr 7, 2024
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -44,10 +44,12 @@
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/WindowScheduler.h"
 #include "llvm/InitializePasses.h"
 
 #include <deque>
@@ -107,6 +109,9 @@ class MachinePipeliner : public MachineFunctionPass {
   bool scheduleLoop(MachineLoop &L);
   bool swingModuloScheduler(MachineLoop &L);
   void setPragmaPipelineOptions(MachineLoop &L);
+  bool runWindowScheduler(MachineLoop &L);
+  bool useSwingModuloScheduler();
+  bool useWindowScheduler(bool Changed);
 };
 
 /// This class builds the dependence graph for the instructions in a loop,
@@ -449,7 +454,7 @@ class ResourceManager {
   const MCSchedModel &SM;
   const TargetSubtargetInfo *ST;
   const TargetInstrInfo *TII;
-  SwingSchedulerDAG *DAG;
+  ScheduleDAGInstrs *DAG;
   const bool UseDFA;
   /// DFA resources for each slot
   llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources;
@@ -493,7 +498,7 @@ class ResourceManager {
 #endif
 
 public:
-  ResourceManager(const TargetSubtargetInfo *ST, SwingSchedulerDAG *DAG)
+  ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
       : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
         DAG(DAG), UseDFA(ST->useDFAforSMS()),
         ProcResourceMasks(SM.getNumProcResourceKinds(), 0),

diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -199,6 +199,9 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   /// True if the subtarget should run MachinePipeliner
   virtual bool enableMachinePipeliner() const { return true; };
 
+  /// True if the subtarget should run WindowScheduler.
+  virtual bool enableWindowScheduler() const { return true; }
+
   /// True if the subtarget should enable joining global copies.
   ///
   /// By default this is enabled if the machine scheduler is enabled, but

diff --git a/llvm/include/llvm/CodeGen/WindowScheduler.h b/llvm/include/llvm/CodeGen/WindowScheduler.h
@@ -0,0 +1,171 @@
+//======----------- WindowScheduler.cpp - window scheduler -------------======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Window Scheduling software pipelining algorithm.
+//
+// The concept of the window algorithm was first unveiled in Steven Muchnick's
+// book, "Advanced Compiler Design And Implementation", and later elaborated
+// upon in Venkatraman Govindaraju's report, "Implementation of Software
+// Pipelining Using Window Scheduling".
+//
+// The window algorithm can be perceived as a modulo scheduling algorithm with a
+// stage count of 2. It boasts a higher scheduling success rate in targets with
+// severe resource conflicts when compared to the classic Swing Modulo
+// Scheduling (SMS) algorithm. To align with the LLVM scheduling framework, we
+// have enhanced the original window algorithm. The primary steps are as
+// follows:
+//
+// 1. Instead of duplicating the original MBB twice as mentioned in the
+// literature, we copy it three times, generating TripleMBB and the
+// corresponding TripleDAG.
+//
+// 2. We establish a scheduling window on TripleMBB and execute list scheduling
+// within it.
+//
+// 3. After multiple list scheduling, we select the best outcome and expand it
+// into the final scheduling result.
+//
+// To cater to the needs of various targets, we have developed the window
+// scheduler in a form that is easily derivable. We recommend employing this
+// algorithm in targets with severe resource conflicts, and it can be utilized
+// either before or after the Register Allocator (RA).
+//
+// The default implementation provided here is before RA. If it is to be used
+// after RA, certain critical algorithm functions will need to be derived.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_WINDOWSCHEDULER_H
+#define LLVM_CODEGEN_WINDOWSCHEDULER_H
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+
+namespace llvm {
+
+enum WindowSchedulingFlag {
+  WS_Off,  /// Turn off window algorithm.
+  WS_On,   /// Use window algorithm after SMS algorithm fails.
+  WS_Force /// Use window algorithm instead of SMS algorithm.
+};
+
+/// The main class in the implementation of the target independent window
+/// scheduler.
+class WindowScheduler {
+protected:
+  MachineSchedContext *Context = nullptr;
+  MachineFunction *MF = nullptr;
+  MachineBasicBlock *MBB = nullptr;
+  MachineLoop &Loop;
+  const TargetSubtargetInfo *Subtarget = nullptr;
+  const TargetInstrInfo *TII = nullptr;
+  const TargetRegisterInfo *TRI = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
+
+  /// To innovatively identify the dependencies between MIs across two trips, we
+  /// construct a DAG for a new MBB, which is created by copying the original
+  /// MBB three times. We refer to this new MBB as 'TripleMBB' and the
+  /// corresponding DAG as 'TripleDAG'.
+  /// If the dependencies are more than two trips, we avoid applying window
+  /// algorithm by identifying successive phis in the old MBB.
+  std::unique_ptr<ScheduleDAGInstrs> TripleDAG;
+  /// OriMIs keeps the MIs removed from the original MBB.
+  SmallVector<MachineInstr *> OriMIs;
+  /// TriMIs keeps the MIs of TripleMBB, which is used to restore TripleMBB.
+  SmallVector<MachineInstr *> TriMIs;
+  /// TriToOri keeps the mappings between the MI clones in TripleMBB and their
+  /// original MI.
+  DenseMap<MachineInstr *, MachineInstr *> TriToOri;
+  /// OriToCycle keeps the mappings between the original MI and its issue cycle.
+  DenseMap<MachineInstr *, int> OriToCycle;
+  /// SchedResult keeps the result of each list scheduling, and the format of
+  /// the tuple is <MI pointer, Cycle, Stage, Order ID>.
+  SmallVector<std::tuple<MachineInstr *, int, int, int>, 256> SchedResult;
+  /// SchedPhiNum records the number of phi in the original MBB, and the
+  /// scheduling starts with MI after phis.
+  unsigned SchedPhiNum = 0;
+  /// SchedInstrNum records the MIs involved in scheduling in the original MBB,
+  /// excluding debug instructions.
+  unsigned SchedInstrNum = 0;
+  /// BestII and BestOffset record the characteristics of the best scheduling
+  /// result and are used together with SchedResult as the final window
+  /// scheduling result.
+  unsigned BestII = UINT_MAX;
+  unsigned BestOffset = 0;
+  /// BaseII is the II obtained when the window offset is SchedPhiNum. This
+  /// offset is the initial position of the sliding window.
+  unsigned BaseII = 0;
+
+public:
+  WindowScheduler(MachineSchedContext *C, MachineLoop &ML);
+  virtual ~WindowScheduler() {}
+
+  bool run();
+
+protected:
+  /// Two types of ScheduleDAGs are needed, one for creating dependency graphs
+  /// only, and the other for list scheduling as determined by the target.
+  virtual ScheduleDAGInstrs *
+  createMachineScheduler(bool OnlyBuildGraph = false);
+  /// Initializes the algorithm and determines if it can be executed.
+  virtual bool initialize();
+  /// Add some related processing before running window scheduling.
+  virtual void preProcess();
+  /// Add some related processing after running window scheduling.
+  virtual void postProcess();
+  /// Back up the MIs in the original MBB and remove them from MBB.
+  void backupMBB();
+  /// Erase the MIs in current MBB and restore the original MIs.
+  void restoreMBB();
+  /// Make three copies of the original MBB to generate a new TripleMBB.
+  virtual void generateTripleMBB();
+  /// Restore the order of MIs in TripleMBB after each list scheduling.
+  virtual void restoreTripleMBB();
+  /// Give the folding position in the window algorithm, where different
+  /// heuristics can be used. It determines the performance and compilation time
+  /// of the algorithm.
+  virtual SmallVector<unsigned> getSearchIndexes(unsigned SearchNum,
+                                                 unsigned SearchRatio);
+  /// Calculate MIs execution cycle after list scheduling.
+  virtual int calculateMaxCycle(ScheduleDAGInstrs &DAG, unsigned Offset);
+  /// Calculate the stall cycle between two trips after list scheduling.
+  virtual int calculateStallCycle(unsigned Offset, int MaxCycle);
+  /// Analyzes the II value after each list scheduling.
+  virtual unsigned analyseII(ScheduleDAGInstrs &DAG, unsigned Offset);
+  /// Phis are scheduled separately after each list scheduling.
+  virtual void schedulePhi(int Offset, unsigned &II);
+  /// Get the final issue order of all scheduled MIs including phis.
+  DenseMap<MachineInstr *, int> getIssueOrder(unsigned Offset, unsigned II);
+  /// Update the scheduling result after each list scheduling.
+  virtual void updateScheduleResult(unsigned Offset, unsigned II);
+  /// Check whether the final result of window scheduling is valid.
+  virtual bool isScheduleValid() { return BestOffset != SchedPhiNum; }
+  /// Using the scheduling infrastructure to expand the results of window
+  /// scheduling. It is usually necessary to add prologue and epilogue MBBs.
+  virtual void expand();
+  /// Update the live intervals for all registers used within MBB.
+  virtual void updateLiveIntervals();
+  /// Estimate a II value at which all MIs will be scheduled successfully.
+  int getEstimatedII(ScheduleDAGInstrs &DAG);
+  /// Gets the iterator range of MIs in the scheduling window.
+  iterator_range<MachineBasicBlock::iterator> getScheduleRange(unsigned Offset,
+                                                               unsigned Num);
+  /// Get the issue cycle of the new MI based on the cycle of the original MI.
+  int getOriCycle(MachineInstr *NewMI);
+  /// Get the original MI from which the new MI is cloned.
+  MachineInstr *getOriMI(MachineInstr *NewMI);
+  /// Get the scheduling stage, where the stage of the new MI is identical to
+  /// the original MI.
+  unsigned getOriStage(MachineInstr *OriMI, unsigned Offset);
+  /// Gets the register in phi which is generated from the current MBB.
+  Register getAntiRegister(MachineInstr *Phi);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
@@ -244,6 +244,7 @@ add_llvm_component_library(LLVMCodeGen
   VLIWMachineScheduler.cpp
   VirtRegMap.cpp
   WasmEHPrepare.cpp
+  WindowScheduler.cpp
   WinEHPrepare.cpp
   XRayInstrumentation.cpp
   ${GeneratedMLSources}

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -68,6 +68,7 @@
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Config/llvm-config.h"
@@ -206,6 +207,17 @@ cl::opt<int> SwpForceIssueWidth(
     cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
     cl::init(-1));
 
+/// A command line argument to set the window scheduling option.
+cl::opt<WindowSchedulingFlag> WindowSchedulingOption(
+    "window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On),
+    cl::desc("Set how to use window scheduling algorithm."),
+    cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off",
+                          "Turn off window algorithm."),
+               clEnumValN(WindowSchedulingFlag::WS_On, "on",
+                          "Use window algorithm after SMS algorithm fails."),
+               clEnumValN(WindowSchedulingFlag::WS_Force, "force",
+                          "Use window algorithm instead of SMS algorithm.")));
+
 } // end namespace llvm
 
 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
@@ -292,8 +304,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
   }
 
   ++NumTrytoPipeline;
+  if (useSwingModuloScheduler())
+    Changed = swingModuloScheduler(L);
 
-  Changed = swingModuloScheduler(L);
+  if (useWindowScheduler(Changed))
+    Changed = runWindowScheduler(L);
 
   LI.LoopPipelinerInfo.reset();
   return Changed;
@@ -484,9 +499,35 @@ void MachinePipeliner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineDominatorTree>();
   AU.addRequired<LiveIntervals>();
   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+  AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
+bool MachinePipeliner::runWindowScheduler(MachineLoop &L) {
+  MachineSchedContext Context;
+  Context.MF = MF;
+  Context.MLI = MLI;
+  Context.MDT = MDT;
+  Context.PassConfig = &getAnalysis<TargetPassConfig>();
+  Context.AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  Context.LIS = &getAnalysis<LiveIntervals>();
+  Context.RegClassInfo->runOnMachineFunction(*MF);
+  WindowScheduler WS(&Context, L);
+  return WS.run();
+}
+
+bool MachinePipeliner::useSwingModuloScheduler() {
+  // SwingModuloScheduler does not work when WindowScheduler is forced.
+  return WindowSchedulingOption != WindowSchedulingFlag::WS_Force;
+}
+
+bool MachinePipeliner::useWindowScheduler(bool Changed) {
+  // WindowScheduler does not work when it is off or when SwingModuloScheduler
+  // is successfully scheduled.
+  return WindowSchedulingOption == WindowSchedulingFlag::WS_Force ||
+         (WindowSchedulingOption == WindowSchedulingFlag::WS_On && !Changed);
+}
+
 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
   if (SwpForceII > 0)
     MII = SwpForceII;