@@ -2568,9 +2568,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2568
2568
return scheduleFound && Schedule.getMaxStageCount () > 0 ;
2569
2569
}
2570
2570
2571
+ static Register findUniqueOperandDefinedInLoop (const MachineInstr &MI) {
2572
+ const MachineRegisterInfo &MRI = MI.getParent ()->getParent ()->getRegInfo ();
2573
+ Register Result;
2574
+ for (const MachineOperand &Use : MI.all_uses ()) {
2575
+ Register Reg = Use.getReg ();
2576
+ if (!Reg.isVirtual ())
2577
+ return Register ();
2578
+ if (MRI.getVRegDef (Reg)->getParent () != MI.getParent ())
2579
+ continue ;
2580
+ if (Result)
2581
+ return Register ();
2582
+ Result = Reg;
2583
+ }
2584
+ return Result;
2585
+ }
2586
+
2587
+ // / When Op is a value that is incremented recursively in a loop and there is a
2588
+ // / unique instruction that increments it, returns true and sets Value.
2589
+ static bool findLoopIncrementValue (const MachineOperand &Op, int &Value) {
2590
+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
2591
+ return false ;
2592
+
2593
+ Register OrgReg = Op.getReg ();
2594
+ Register CurReg = OrgReg;
2595
+ const MachineBasicBlock *LoopBB = Op.getParent ()->getParent ();
2596
+ const MachineRegisterInfo &MRI = LoopBB->getParent ()->getRegInfo ();
2597
+
2598
+ const TargetInstrInfo *TII =
2599
+ LoopBB->getParent ()->getSubtarget ().getInstrInfo ();
2600
+ const TargetRegisterInfo *TRI =
2601
+ LoopBB->getParent ()->getSubtarget ().getRegisterInfo ();
2602
+
2603
+ MachineInstr *Phi = nullptr ;
2604
+ MachineInstr *Increment = nullptr ;
2605
+
2606
+ // Traverse definitions until it reaches Op or an instruction that does not
2607
+ // satisfy the condition.
2608
+ // Acceptable example:
2609
+ // bb.0:
2610
+ // %0 = PHI %3, %bb.0, ...
2611
+ // %2 = ADD %0, Value
2612
+ // ... = LOAD %2(Op)
2613
+ // %3 = COPY %2
2614
+ while (true ) {
2615
+ if (!CurReg.isValid () || !CurReg.isVirtual ())
2616
+ return false ;
2617
+ MachineInstr *Def = MRI.getVRegDef (CurReg);
2618
+ if (Def->getParent () != LoopBB)
2619
+ return false ;
2620
+
2621
+ if (Def->isCopy ()) {
2622
+ // Ignore copy instructions unless they contain subregisters
2623
+ if (Def->getOperand (0 ).getSubReg () || Def->getOperand (1 ).getSubReg ())
2624
+ return false ;
2625
+ CurReg = Def->getOperand (1 ).getReg ();
2626
+ } else if (Def->isPHI ()) {
2627
+ // There must be just one Phi
2628
+ if (Phi)
2629
+ return false ;
2630
+ Phi = Def;
2631
+ CurReg = getLoopPhiReg (*Def, LoopBB);
2632
+ } else if (TII->getIncrementValue (*Def, Value)) {
2633
+ // Potentially a unique increment
2634
+ if (Increment)
2635
+ // Multiple increments exist
2636
+ return false ;
2637
+
2638
+ const MachineOperand *BaseOp;
2639
+ int64_t Offset;
2640
+ bool OffsetIsScalable;
2641
+ if (TII->getMemOperandWithOffset (*Def, BaseOp, Offset, OffsetIsScalable,
2642
+ TRI)) {
2643
+ // Pre/post increment instruction
2644
+ CurReg = BaseOp->getReg ();
2645
+ } else {
2646
+ // If only one of the operands is defined within the loop, it is assumed
2647
+ // to be an incremented value.
2648
+ CurReg = findUniqueOperandDefinedInLoop (*Def);
2649
+ if (!CurReg.isValid ())
2650
+ return false ;
2651
+ }
2652
+ Increment = Def;
2653
+ } else {
2654
+ return false ;
2655
+ }
2656
+ if (CurReg == OrgReg)
2657
+ break ;
2658
+ }
2659
+
2660
+ if (!Phi || !Increment)
2661
+ return false ;
2662
+
2663
+ return true ;
2664
+ }
2665
+
2571
2666
// / Return true if we can compute the amount the instruction changes
2572
2667
// / during each iteration. Set Delta to the amount of the change.
2573
- bool SwingSchedulerDAG::computeDelta (MachineInstr &MI, unsigned &Delta) const {
2668
+ bool SwingSchedulerDAG::computeDelta (const MachineInstr &MI, int &Delta) const {
2574
2669
const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2575
2670
const MachineOperand *BaseOp;
2576
2671
int64_t Offset;
@@ -2585,24 +2680,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {
2585
2680
if (!BaseOp->isReg ())
2586
2681
return false ;
2587
2682
2588
- Register BaseReg = BaseOp->getReg ();
2589
-
2590
- MachineRegisterInfo &MRI = MF.getRegInfo ();
2591
- // Check if there is a Phi. If so, get the definition in the loop.
2592
- MachineInstr *BaseDef = MRI.getVRegDef (BaseReg);
2593
- if (BaseDef && BaseDef->isPHI ()) {
2594
- BaseReg = getLoopPhiReg (*BaseDef, MI.getParent ());
2595
- BaseDef = MRI.getVRegDef (BaseReg);
2596
- }
2597
- if (!BaseDef)
2598
- return false ;
2599
-
2600
- int D = 0 ;
2601
- if (!TII->getIncrementValue (*BaseDef, D) && D >= 0 )
2602
- return false ;
2603
-
2604
- Delta = D;
2605
- return true ;
2683
+ return findLoopIncrementValue (*BaseOp, Delta);
2606
2684
}
2607
2685
2608
2686
// / Check if we can change the instruction to use an offset value from the
@@ -2720,6 +2798,96 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2720
2798
return Def;
2721
2799
}
2722
2800
2801
+ // / Return false if there is no overlap between the region accessed by BaseMI in
2802
+ // / an iteration and the region accessed by OtherMI in subsequent iterations.
2803
+ bool SwingSchedulerDAG::mayOverlapInLaterIter (
2804
+ const MachineInstr *BaseMI, const MachineInstr *OtherMI) const {
2805
+ int DeltaB, DeltaO, Delta;
2806
+ if (!computeDelta (*BaseMI, DeltaB) || !computeDelta (*OtherMI, DeltaO) ||
2807
+ DeltaB != DeltaO)
2808
+ return true ;
2809
+ Delta = DeltaB;
2810
+
2811
+ const MachineOperand *BaseOpB, *BaseOpO;
2812
+ int64_t OffsetB, OffsetO;
2813
+ bool OffsetBIsScalable, OffsetOIsScalable;
2814
+ const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2815
+ if (!TII->getMemOperandWithOffset (*BaseMI, BaseOpB, OffsetB,
2816
+ OffsetBIsScalable, TRI) ||
2817
+ !TII->getMemOperandWithOffset (*OtherMI, BaseOpO, OffsetO,
2818
+ OffsetOIsScalable, TRI))
2819
+ return true ;
2820
+
2821
+ if (OffsetBIsScalable || OffsetOIsScalable)
2822
+ return true ;
2823
+
2824
+ if (!BaseOpB->isIdenticalTo (*BaseOpO)) {
2825
+ // Pass cases with different base operands but same initial values.
2826
+ // Typically for when pre/post increment is used.
2827
+
2828
+ if (!BaseOpB->isReg () || !BaseOpO->isReg ())
2829
+ return true ;
2830
+ Register RegB = BaseOpB->getReg (), RegO = BaseOpO->getReg ();
2831
+ if (!RegB.isVirtual () || !RegO.isVirtual ())
2832
+ return true ;
2833
+
2834
+ MachineInstr *DefB = MRI.getVRegDef (BaseOpB->getReg ());
2835
+ MachineInstr *DefO = MRI.getVRegDef (BaseOpO->getReg ());
2836
+ if (!DefB || !DefO || !DefB->isPHI () || !DefO->isPHI ())
2837
+ return true ;
2838
+
2839
+ unsigned InitValB = 0 ;
2840
+ unsigned LoopValB = 0 ;
2841
+ unsigned InitValO = 0 ;
2842
+ unsigned LoopValO = 0 ;
2843
+ getPhiRegs (*DefB, BB, InitValB, LoopValB);
2844
+ getPhiRegs (*DefO, BB, InitValO, LoopValO);
2845
+ MachineInstr *InitDefB = MRI.getVRegDef (InitValB);
2846
+ MachineInstr *InitDefO = MRI.getVRegDef (InitValO);
2847
+
2848
+ if (!InitDefB->isIdenticalTo (*InitDefO))
2849
+ return true ;
2850
+ }
2851
+
2852
+ LocationSize AccessSizeB = (*BaseMI->memoperands_begin ())->getSize ();
2853
+ LocationSize AccessSizeO = (*OtherMI->memoperands_begin ())->getSize ();
2854
+
2855
+ // This is the main test, which checks the offset values and the loop
2856
+ // increment value to determine if the accesses may be loop carried.
2857
+ if (!AccessSizeB.hasValue () || !AccessSizeO.hasValue ())
2858
+ return true ;
2859
+
2860
+ LLVM_DEBUG ({
2861
+ dbgs () << " Overlap check:\n " ;
2862
+ dbgs () << " BaseMI: " ;
2863
+ BaseMI->dump ();
2864
+ dbgs () << " Base + " << OffsetB << " + I * " << Delta
2865
+ << " , Len: " << AccessSizeB.getValue () << " \n " ;
2866
+ dbgs () << " OtherMI: " ;
2867
+ OtherMI->dump ();
2868
+ dbgs () << " Base + " << OffsetO << " + I * " << Delta
2869
+ << " , Len: " << AccessSizeO.getValue () << " \n " ;
2870
+ });
2871
+
2872
+ if (Delta < 0 ) {
2873
+ int64_t BaseMinAddr = OffsetB;
2874
+ int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue () - 1 ;
2875
+ if (BaseMinAddr > OhterNextIterMaxAddr) {
2876
+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2877
+ return false ;
2878
+ }
2879
+ } else {
2880
+ int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue () - 1 ;
2881
+ int64_t OtherNextIterMinAddr = OffsetO + Delta;
2882
+ if (BaseMaxAddr < OtherNextIterMinAddr) {
2883
+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2884
+ return false ;
2885
+ }
2886
+ }
2887
+ LLVM_DEBUG (dbgs () << " Result: Overlap\n " );
2888
+ return true ;
2889
+ }
2890
+
2723
2891
// / Return true for an order or output dependence that is loop carried
2724
2892
// / potentially. A dependence is loop carried if the destination defines a value
2725
2893
// / that may be used or defined by the source in a subsequent iteration.
@@ -2753,61 +2921,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
2753
2921
// The conservative assumption is that a dependence between memory operations
2754
2922
// may be loop carried. The following code checks when it can be proved that
2755
2923
// there is no loop carried dependence.
2756
- unsigned DeltaS, DeltaD;
2757
- if (!computeDelta (*SI, DeltaS) || !computeDelta (*DI, DeltaD))
2758
- return true ;
2759
-
2760
- const MachineOperand *BaseOpS, *BaseOpD;
2761
- int64_t OffsetS, OffsetD;
2762
- bool OffsetSIsScalable, OffsetDIsScalable;
2763
- const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2764
- if (!TII->getMemOperandWithOffset (*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2765
- TRI) ||
2766
- !TII->getMemOperandWithOffset (*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2767
- TRI))
2768
- return true ;
2769
-
2770
- assert (!OffsetSIsScalable && !OffsetDIsScalable &&
2771
- " Expected offsets to be byte offsets" );
2772
-
2773
- MachineInstr *DefS = MRI.getVRegDef (BaseOpS->getReg ());
2774
- MachineInstr *DefD = MRI.getVRegDef (BaseOpD->getReg ());
2775
- if (!DefS || !DefD || !DefS->isPHI () || !DefD->isPHI ())
2776
- return true ;
2777
-
2778
- unsigned InitValS = 0 ;
2779
- unsigned LoopValS = 0 ;
2780
- unsigned InitValD = 0 ;
2781
- unsigned LoopValD = 0 ;
2782
- getPhiRegs (*DefS, BB, InitValS, LoopValS);
2783
- getPhiRegs (*DefD, BB, InitValD, LoopValD);
2784
- MachineInstr *InitDefS = MRI.getVRegDef (InitValS);
2785
- MachineInstr *InitDefD = MRI.getVRegDef (InitValD);
2786
-
2787
- if (!InitDefS->isIdenticalTo (*InitDefD))
2788
- return true ;
2789
-
2790
- // Check that the base register is incremented by a constant value for each
2791
- // iteration.
2792
- MachineInstr *LoopDefS = MRI.getVRegDef (LoopValS);
2793
- int D = 0 ;
2794
- if (!LoopDefS || !TII->getIncrementValue (*LoopDefS, D))
2795
- return true ;
2796
-
2797
- LocationSize AccessSizeS = (*SI->memoperands_begin ())->getSize ();
2798
- LocationSize AccessSizeD = (*DI->memoperands_begin ())->getSize ();
2799
-
2800
- // This is the main test, which checks the offset values and the loop
2801
- // increment value to determine if the accesses may be loop carried.
2802
- if (!AccessSizeS.hasValue () || !AccessSizeD.hasValue ())
2803
- return true ;
2804
-
2805
- if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue () ||
2806
- DeltaD < AccessSizeD.getValue ())
2807
- return true ;
2808
-
2809
- return (OffsetS + (int64_t )AccessSizeS.getValue () <
2810
- OffsetD + (int64_t )AccessSizeD.getValue ());
2924
+ return mayOverlapInLaterIter (DI, SI);
2811
2925
}
2812
2926
2813
2927
void SwingSchedulerDAG::postProcessDAG () {
0 commit comments