42
42
43
43
#include " llvm/ADT/STLExtras.h"
44
44
#include " llvm/ADT/SetVector.h"
45
+ #include " llvm/Analysis/AliasAnalysis.h"
45
46
#include " llvm/CodeGen/DFAPacketizer.h"
46
47
#include " llvm/CodeGen/MachineDominators.h"
47
48
#include " llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -190,6 +191,33 @@ class SwingSchedulerDDGEdge {
190
191
bool ignoreDependence (bool IgnoreAnti) const ;
191
192
};
192
193
194
+ struct LoopCarriedEdges {
195
+ using OutputDep = SmallDenseMap<Register, SmallSetVector<SUnit *, 4 >>;
196
+ using OrderDep = SmallSetVector<SUnit *, 8 >;
197
+ using OutputDepsType = DenseMap<SUnit *, OutputDep>;
198
+ using OrderDepsType = DenseMap<SUnit *, OrderDep>;
199
+
200
+ OutputDepsType OutputDeps;
201
+ OrderDepsType OrderDeps;
202
+
203
+ const OutputDep *getOutputDepOrNull (SUnit *Key) const {
204
+ auto Ite = OutputDeps.find (Key);
205
+ if (Ite == OutputDeps.end ())
206
+ return nullptr ;
207
+ return &Ite->second ;
208
+ }
209
+
210
+ const OrderDep *getOrderDepOrNull (SUnit *Key) const {
211
+ auto Ite = OrderDeps.find (Key);
212
+ if (Ite == OrderDeps.end ())
213
+ return nullptr ;
214
+ return &Ite->second ;
215
+ }
216
+
217
+ void dump (SUnit *SU, const TargetRegisterInfo *TRI,
218
+ const MachineRegisterInfo *MRI) const ;
219
+ };
220
+
193
221
// / Represents dependencies between instructions. This class is a wrapper of
194
222
// / `SUnits` and its dependencies to manipulate back-edges in a natural way.
195
223
// / Currently it only supports back-edges via PHI, which are expressed as
@@ -217,8 +245,12 @@ class SwingSchedulerDDG {
217
245
SwingSchedulerDDGEdges &getEdges (const SUnit *SU);
218
246
const SwingSchedulerDDGEdges &getEdges (const SUnit *SU) const ;
219
247
248
+ void addLoopCarriedEdges (std::vector<SUnit> &SUnits,
249
+ const LoopCarriedEdges &LCE);
250
+
220
251
public:
221
- SwingSchedulerDDG (std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU);
252
+ SwingSchedulerDDG (std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU,
253
+ const LoopCarriedEdges &LCE);
222
254
223
255
const EdgesType &getInEdges (const SUnit *SU) const ;
224
256
@@ -285,22 +317,14 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
285
317
BitVector Blocked;
286
318
SmallVector<SmallPtrSet<SUnit *, 4 >, 10 > B;
287
319
SmallVector<SmallVector<int , 4 >, 16 > AdjK;
288
- // Node to Index from ScheduleDAGTopologicalSort
289
- std::vector<int > *Node2Idx;
320
+ SmallVector<BitVector, 16 > LoopCarried;
290
321
unsigned NumPaths = 0u ;
291
- static unsigned MaxPaths;
292
322
293
323
public:
294
- Circuits (std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
295
- : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
296
- Node2Idx = new std::vector<int >(SUs.size ());
297
- unsigned Idx = 0 ;
298
- for (const auto &NodeNum : Topo)
299
- Node2Idx->at (NodeNum) = Idx++;
300
- }
324
+ Circuits (std::vector<SUnit> &SUs)
325
+ : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {}
301
326
Circuits &operator =(const Circuits &other) = delete ;
302
327
Circuits (const Circuits &other) = delete ;
303
- ~Circuits () { delete Node2Idx; }
304
328
305
329
// / Reset the data structures used in the circuit algorithm.
306
330
void reset () {
@@ -310,9 +334,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
310
334
NumPaths = 0 ;
311
335
}
312
336
313
- void createAdjacencyStructure (SwingSchedulerDAG *DAG );
337
+ void createAdjacencyStructure (const SwingSchedulerDDG *DDG );
314
338
bool circuit (int V, int S, NodeSetType &NodeSets,
315
- const SwingSchedulerDAG *DAG , bool HasBackedge = false );
339
+ const SwingSchedulerDDG *DDG , bool HasLoopCarriedEdge = false );
316
340
void unblock (int U);
317
341
};
318
342
@@ -366,7 +390,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
366
390
return ScheduleInfo[Node->NodeNum ].ZeroLatencyHeight ;
367
391
}
368
392
369
- bool isLoopCarriedDep (const SwingSchedulerDDGEdge &Edge) const ;
393
+ bool hasLoopCarriedMemDep (const MachineInstr *Src, const MachineInstr *Dst,
394
+ BatchAAResults *BAA) const ;
370
395
371
396
void applyInstrChange (MachineInstr *MI, SMSchedule &Schedule);
372
397
@@ -390,11 +415,11 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
390
415
391
416
const SwingSchedulerDDG *getDDG () const { return DDG.get (); }
392
417
393
- bool mayOverlapInLaterIter (const MachineInstr *BaseMI,
394
- const MachineInstr *OtherMI) const ;
418
+ AliasResult::Kind mayOverlapInLaterIter (const MachineInstr *BaseMI,
419
+ const MachineInstr *OtherMI) const ;
395
420
396
421
private:
397
- void addLoopCarriedDependences (AAResults *AA);
422
+ LoopCarriedEdges addLoopCarriedDependences (AAResults *AA);
398
423
void updatePhiDependences ();
399
424
void changeDependences ();
400
425
unsigned calculateResMII ();
@@ -440,7 +465,7 @@ class NodeSet {
440
465
using iterator = SetVector<SUnit *>::const_iterator;
441
466
442
467
NodeSet () = default ;
443
- NodeSet (iterator S, iterator E, const SwingSchedulerDAG *DAG )
468
+ NodeSet (iterator S, iterator E, const SwingSchedulerDDG *DDG )
444
469
: Nodes(S, E), HasRecurrence(true ) {
445
470
// Calculate the latency of this node set.
446
471
// Example to demonstrate the calculation:
@@ -456,7 +481,6 @@ class NodeSet {
456
481
//
457
482
// Hold a map from each SUnit in the circle to the maximum distance from the
458
483
// source node by only considering the nodes.
459
- const SwingSchedulerDDG *DDG = DAG->getDDG ();
460
484
DenseMap<SUnit *, unsigned > SUnitToDistance;
461
485
for (auto *Node : Nodes)
462
486
SUnitToDistance[Node] = 0 ;
@@ -474,22 +498,6 @@ class NodeSet {
474
498
DV = DU + Succ.getLatency ();
475
499
}
476
500
}
477
- // Handle a back-edge in loop carried dependencies
478
- SUnit *FirstNode = Nodes[0 ];
479
- SUnit *LastNode = Nodes[Nodes.size () - 1 ];
480
-
481
- for (auto &PI : DDG->getInEdges (LastNode)) {
482
- // If we have an order dep that is potentially loop carried then a
483
- // back-edge exists between the last node and the first node that isn't
484
- // modeled in the DAG. Handle it manually by adding 1 to the distance of
485
- // the last node.
486
- if (PI.getSrc () != FirstNode || !PI.isOrderDep () ||
487
- !DAG->isLoopCarriedDep (PI))
488
- continue ;
489
- SUnitToDistance[FirstNode] =
490
- std::max (SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1 );
491
- }
492
-
493
501
// The latency is the distance from the source node to itself.
494
502
Latency = SUnitToDistance[Nodes.front ()];
495
503
}
0 commit comments