Skip to content

Commit 0c5319e

Browse files
authored
[ModuloSchedule][AArch64] Implement modulo variable expansion for pipelining (#65609)
Modulo variable expansion is a technique that resolves overlap of variable lifetimes by unrolling. The existing implementation solves it by making a copy by move instruction for processors with ordinary registers such as Arm and x86. This method may result in a very large number of move instructions, which can cause performance problems. Modulo variable expansion is enabled by specifying -pipeliner-mve-cg. A backend must implement some newly defined interfaces in PipelinerLoopInfo. They were implemented for AArch64. Discourse thread: https://discourse.llvm.org/t/implementing-modulo-variable-expansion-for-machinepipeliner
1 parent 46c05df commit 0c5319e

21 files changed

+2857
-51
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,78 @@ class PeelingModuloScheduleExpander {
370370
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
371371
};
372372

373+
/// Expand the kernel using modulo variable expansion algorithm (MVE).
374+
/// It unrolls the kernel enough to avoid overlap of register lifetime.
375+
class ModuloScheduleExpanderMVE {
376+
private:
377+
using ValueMapTy = DenseMap<unsigned, unsigned>;
378+
using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
379+
using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
380+
381+
ModuloSchedule &Schedule;
382+
MachineFunction &MF;
383+
const TargetSubtargetInfo &ST;
384+
MachineRegisterInfo &MRI;
385+
const TargetInstrInfo *TII = nullptr;
386+
LiveIntervals &LIS;
387+
388+
MachineBasicBlock *OrigKernel = nullptr;
389+
MachineBasicBlock *OrigPreheader = nullptr;
390+
MachineBasicBlock *OrigExit = nullptr;
391+
MachineBasicBlock *Check = nullptr;
392+
MachineBasicBlock *Prolog = nullptr;
393+
MachineBasicBlock *NewKernel = nullptr;
394+
MachineBasicBlock *Epilog = nullptr;
395+
MachineBasicBlock *NewPreheader = nullptr;
396+
MachineBasicBlock *NewExit = nullptr;
397+
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
398+
399+
/// The number of unroll required to avoid overlap of live ranges.
400+
/// NumUnroll = 1 means no unrolling.
401+
int NumUnroll;
402+
403+
void calcNumUnroll();
404+
void generatePipelinedLoop();
405+
void generateProlog(SmallVectorImpl<ValueMapTy> &VRMap);
406+
void generatePhi(MachineInstr *OrigMI, int UnrollNum,
407+
SmallVectorImpl<ValueMapTy> &PrologVRMap,
408+
SmallVectorImpl<ValueMapTy> &KernelVRMap,
409+
SmallVectorImpl<ValueMapTy> &PhiVRMap);
410+
void generateKernel(SmallVectorImpl<ValueMapTy> &PrologVRMap,
411+
SmallVectorImpl<ValueMapTy> &KernelVRMap,
412+
InstrMapTy &LastStage0Insts);
413+
void generateEpilog(SmallVectorImpl<ValueMapTy> &KernelVRMap,
414+
SmallVectorImpl<ValueMapTy> &EpilogVRMap,
415+
InstrMapTy &LastStage0Insts);
416+
void mergeRegUsesAfterPipeline(Register OrigReg, Register NewReg);
417+
418+
MachineInstr *cloneInstr(MachineInstr *OldMI);
419+
420+
void updateInstrDef(MachineInstr *NewMI, ValueMapTy &VRMap, bool LastDef);
421+
422+
void generateKernelPhi(Register OrigLoopVal, Register NewLoopVal,
423+
unsigned UnrollNum,
424+
SmallVectorImpl<ValueMapTy> &VRMapProlog,
425+
SmallVectorImpl<ValueMapTy> &VRMapPhi);
426+
void updateInstrUse(MachineInstr *MI, int StageNum, int PhaseNum,
427+
SmallVectorImpl<ValueMapTy> &CurVRMap,
428+
SmallVectorImpl<ValueMapTy> *PrevVRMap);
429+
430+
void insertCondBranch(MachineBasicBlock &MBB, int RequiredTC,
431+
InstrMapTy &LastStage0Insts,
432+
MachineBasicBlock &GreaterThan,
433+
MachineBasicBlock &Otherwise);
434+
435+
public:
436+
ModuloScheduleExpanderMVE(MachineFunction &MF, ModuloSchedule &S,
437+
LiveIntervals &LIS)
438+
: Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
439+
TII(ST.getInstrInfo()), LIS(LIS) {}
440+
441+
void expand();
442+
static bool canApply(MachineLoop &L);
443+
};
444+
373445
/// Expander that simply annotates each scheduled instruction with a post-instr
374446
/// symbol that can be consumed by the ModuloScheduleTest pass.
375447
///

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,26 @@ class TargetInstrInfo : public MCInstrInfo {
767767
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
768768
SmallVectorImpl<MachineOperand> &Cond) = 0;
769769

770+
/// Create a condition to determine if the remaining trip count for a phase
771+
/// is greater than TC. Some instructions such as comparisons may be
772+
/// inserted at the bottom of MBB. All instructions expanded for the
773+
/// phase must be inserted in MBB before calling this function.
774+
/// LastStage0Insts is the map from the original instructions scheduled at
775+
/// stage#0 to the expanded instructions for the last iteration of the
776+
/// kernel. LastStage0Insts is intended to obtain the instruction that
777+
/// refers the latest loop counter value.
778+
///
779+
/// MBB can also be a predecessor of the prologue block. Then
780+
/// LastStage0Insts must be empty and the compared value is the initial
781+
/// value of the trip count.
782+
virtual void createRemainingIterationsGreaterCondition(
783+
int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
784+
DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) {
785+
llvm_unreachable(
786+
"Target didn't implement "
787+
"PipelinerLoopInfo::createRemainingIterationsGreaterCondition!");
788+
}
789+
770790
/// Modify the loop such that the trip count is
771791
/// OriginalTC + TripCountAdjust.
772792
virtual void adjustTripCount(int TripCountAdjust) = 0;
@@ -780,6 +800,10 @@ class TargetInstrInfo : public MCInstrInfo {
780800
/// Once this function is called, no other functions on this object are
781801
/// valid; the loop has been removed.
782802
virtual void disposed() = 0;
803+
804+
/// Return true if the target can expand pipelined schedule with modulo
805+
/// variable expansion.
806+
virtual bool isMVEExpanderSupported() { return false; }
783807
};
784808

785809
/// Analyze loop L, which must be a single-basic-block loop, and if the

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ static cl::opt<int>
192192
cl::desc("Margin representing the unused percentage of "
193193
"the register pressure limit"));
194194

195+
static cl::opt<bool>
196+
MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
197+
cl::desc("Use the MVE code generator for software pipelining"));
198+
195199
namespace llvm {
196200

197201
// A command line option to enable the CopyToPhi DAG mutation.
@@ -677,6 +681,11 @@ void SwingSchedulerDAG::schedule() {
677681
if (ExperimentalCodeGen && NewInstrChanges.empty()) {
678682
PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
679683
MSE.expand();
684+
} else if (MVECodeGen && NewInstrChanges.empty() &&
685+
LoopPipelinerInfo->isMVEExpanderSupported() &&
686+
ModuloScheduleExpanderMVE::canApply(Loop)) {
687+
ModuloScheduleExpanderMVE MSE(MF, MS, LIS);
688+
MSE.expand();
680689
} else {
681690
ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
682691
MSE.expand();

0 commit comments

Comments
 (0)