@@ -421,7 +421,14 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
421
421
422
422
// Hook up the new basic block to its predecessors.
423
423
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
424
- VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
424
+ auto *VPRB = dyn_cast<VPRegionBlock>(PredVPBlock);
425
+
426
+ // The exiting block that leads to this block might be an early exit from
427
+ // a loop region.
428
+ VPBasicBlock *PredVPBB = VPRB && VPRB->getEarlyExit () == this
429
+ ? cast<VPBasicBlock>(VPRB->getEarlyExiting ())
430
+ : PredVPBlock->getExitingBasicBlock ();
431
+
425
432
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
426
433
BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
427
434
@@ -443,6 +450,11 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
443
450
// Set each forward successor here when it is created, excluding
444
451
// backedges. A backward successor is set when the branch is created.
445
452
unsigned idx = PredVPSuccessors.front () == this ? 0 : 1 ;
453
+ VPRegionBlock *PredParentRegion =
454
+ dyn_cast_or_null<VPRegionBlock>(PredVPBB->getParent ());
455
+ if (PredParentRegion->getEarlyExiting () == PredVPBB) {
456
+ idx = 1 - idx;
457
+ }
446
458
assert (!TermBr->getSuccessor (idx) &&
447
459
" Trying to reset an existing successor block." );
448
460
TermBr->setSuccessor (idx, NewBB);
@@ -499,6 +511,7 @@ void VPBasicBlock::execute(VPTransformState *State) {
499
511
!((SingleHPred = getSingleHierarchicalPredecessor ()) &&
500
512
SingleHPred->getExitingBasicBlock () == PrevVPBB &&
501
513
PrevVPBB->getSingleHierarchicalSuccessor () &&
514
+ PrevVPBB != getEnclosingLoopRegion ()->getEarlyExiting () &&
502
515
(SingleHPred->getParent () == getEnclosingLoopRegion () &&
503
516
!IsLoopRegion (SingleHPred))) && /* B */
504
517
!(Replica && getPredecessors ().empty ())) { /* C */
@@ -517,7 +530,8 @@ void VPBasicBlock::execute(VPTransformState *State) {
517
530
UnreachableInst *Terminator = State->Builder .CreateUnreachable ();
518
531
// Register NewBB in its loop. In innermost loops its the same for all
519
532
// BB's.
520
- if (State->CurrentVectorLoop )
533
+ if (State->CurrentVectorLoop &&
534
+ this != getEnclosingLoopRegion ()->getEarlyExit ())
521
535
State->CurrentVectorLoop ->addBasicBlockToLoop (NewBB, *State->LI );
522
536
State->Builder .SetInsertPoint (Terminator);
523
537
State->CFG .PrevBB = NewBB;
@@ -635,7 +649,11 @@ const VPRecipeBase *VPBasicBlock::getTerminator() const {
635
649
}
636
650
637
651
bool VPBasicBlock::isExiting () const {
638
- return getParent () && getParent ()->getExitingBasicBlock () == this ;
652
+ const VPRegionBlock *VPRB = getParent ();
653
+ if (!VPRB)
654
+ return false ;
655
+ return VPRB->getExitingBasicBlock () == this ||
656
+ VPRB->getEarlyExiting () == this ;
639
657
}
640
658
641
659
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -876,13 +894,15 @@ static VPIRBasicBlock *createVPIRBasicBlockFor(BasicBlock *BB) {
876
894
VPlanPtr VPlan::createInitialVPlan (Type *InductionTy,
877
895
PredicatedScalarEvolution &PSE,
878
896
bool RequiresScalarEpilogueCheck,
879
- bool TailFolded, Loop *TheLoop) {
897
+ bool TailFolded, Loop *TheLoop,
898
+ BasicBlock *EarlyExitingBB,
899
+ BasicBlock *EarlyExitBB) {
880
900
VPIRBasicBlock *Entry = createVPIRBasicBlockFor (TheLoop->getLoopPreheader ());
881
901
VPBasicBlock *VecPreheader = new VPBasicBlock (" vector.ph" );
882
902
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader);
883
903
884
904
// Create SCEV and VPValue for the trip count.
885
- const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount ();
905
+ const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount ();
886
906
assert (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && " Invalid loop count" );
887
907
ScalarEvolution &SE = *PSE.getSE ();
888
908
const SCEV *TripCount =
@@ -902,6 +922,13 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
902
922
VPBasicBlock *MiddleVPBB = new VPBasicBlock (" middle.block" );
903
923
VPBlockUtils::insertBlockAfter (MiddleVPBB, TopRegion);
904
924
925
+ if (EarlyExitingBB) {
926
+ VPBasicBlock *EarlyExitVPBB = new VPBasicBlock (" vector.early.exit" );
927
+ TopRegion->setEarlyExit (EarlyExitVPBB);
928
+ VPBlockUtils::connectBlocks (TopRegion, EarlyExitVPBB);
929
+ TopRegion->setOrigEarlyExit (EarlyExitBB);
930
+ }
931
+
905
932
VPBasicBlock *ScalarPH = new VPBasicBlock (" scalar.ph" );
906
933
if (!RequiresScalarEpilogueCheck) {
907
934
VPBlockUtils::connectBlocks (MiddleVPBB, ScalarPH);
@@ -916,7 +943,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
916
943
// 2) If we require a scalar epilogue, there is no conditional branch as
917
944
// we unconditionally branch to the scalar preheader. Do nothing.
918
945
// 3) Otherwise, construct a runtime check.
919
- BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock ();
946
+ BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock ();
920
947
auto *VPExitBlock = createVPIRBasicBlockFor (IRExitBlock);
921
948
// The connection order corresponds to the operands of the conditional branch.
922
949
VPBlockUtils::insertBlockAfter (VPExitBlock, MiddleVPBB);
@@ -992,7 +1019,8 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
992
1019
// / VPBB are moved to the end of the newly created VPIRBasicBlock. VPBB must
993
1020
// / have a single predecessor, which is rewired to the new VPIRBasicBlock. All
994
1021
// / successors of VPBB, if any, are rewired to the new VPIRBasicBlock.
995
- static void replaceVPBBWithIRVPBB (VPBasicBlock *VPBB, BasicBlock *IRBB) {
1022
+ static VPIRBasicBlock *replaceVPBBWithIRVPBB (VPBasicBlock *VPBB,
1023
+ BasicBlock *IRBB) {
996
1024
VPIRBasicBlock *IRVPBB = createVPIRBasicBlockFor (IRBB);
997
1025
for (auto &R : make_early_inc_range (*VPBB)) {
998
1026
assert (!R.isPhi () && " Tried to move phi recipe to end of block" );
@@ -1006,6 +1034,7 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
1006
1034
VPBlockUtils::disconnectBlocks (VPBB, Succ);
1007
1035
}
1008
1036
delete VPBB;
1037
+ return IRVPBB;
1009
1038
}
1010
1039
1011
1040
// / Generate the code inside the preheader and body of the vectorized loop.
@@ -1029,7 +1058,7 @@ void VPlan::execute(VPTransformState *State) {
1029
1058
// VPlan execution rather than earlier during VPlan construction.
1030
1059
BasicBlock *MiddleBB = State->CFG .ExitBB ;
1031
1060
VPBasicBlock *MiddleVPBB =
1032
- cast<VPBasicBlock>(getVectorLoopRegion ()->getSingleSuccessor () );
1061
+ cast<VPBasicBlock>(getVectorLoopRegion ()->getSuccessors ()[ 0 ] );
1033
1062
// Find the VPBB for the scalar preheader, relying on the current structure
1034
1063
// when creating the middle block and its successrs: if there's a single
1035
1064
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1043,7 +1072,14 @@ void VPlan::execute(VPTransformState *State) {
1043
1072
assert (!isa<VPIRBasicBlock>(ScalarPhVPBB) &&
1044
1073
" scalar preheader cannot be wrapped already" );
1045
1074
replaceVPBBWithIRVPBB (ScalarPhVPBB, ScalarPh);
1046
- replaceVPBBWithIRVPBB (MiddleVPBB, MiddleBB);
1075
+ MiddleVPBB = replaceVPBBWithIRVPBB (MiddleVPBB, MiddleBB);
1076
+
1077
+ // Ensure the middle block is still the first successor.
1078
+ for (auto *Succ : getVectorLoopRegion ()->getSuccessors ())
1079
+ if (Succ == MiddleVPBB) {
1080
+ getVectorLoopRegion ()->moveSuccessorToFront (MiddleVPBB);
1081
+ break ;
1082
+ }
1047
1083
1048
1084
// Disconnect the middle block from its single successor (the scalar loop
1049
1085
// header) in both the CFG and DT. The branch will be recreated during VPlan
@@ -1104,6 +1140,20 @@ void VPlan::execute(VPTransformState *State) {
1104
1140
cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1105
1141
}
1106
1142
1143
+ // Patch up early exiting vector block to jump to the original scalar loop's
1144
+ // early exit block.
1145
+ if (getVectorLoopRegion ()->getEarlyExit ()) {
1146
+ VPBasicBlock *EarlyExitVPBB =
1147
+ cast<VPBasicBlock>(getVectorLoopRegion ()->getEarlyExit ());
1148
+ BasicBlock *VectorEarlyExitBB = State->CFG .VPBB2IRBB [EarlyExitVPBB];
1149
+ BasicBlock *OrigEarlyExitBB = getVectorLoopRegion ()->getOrigEarlyExit ();
1150
+ BranchInst *BI = BranchInst::Create (OrigEarlyExitBB);
1151
+ BI->insertBefore (VectorEarlyExitBB->getTerminator ());
1152
+ VectorEarlyExitBB->getTerminator ()->eraseFromParent ();
1153
+ State->CFG .DTU .applyUpdates (
1154
+ {{DominatorTree::Insert, VectorEarlyExitBB, OrigEarlyExitBB}});
1155
+ }
1156
+
1107
1157
State->CFG .DTU .flush ();
1108
1158
assert (State->CFG .DTU .getDomTree ().verify (
1109
1159
DominatorTree::VerificationLevel::Fast) &&
@@ -1212,9 +1262,10 @@ LLVM_DUMP_METHOD
1212
1262
void VPlan::dump () const { print (dbgs ()); }
1213
1263
#endif
1214
1264
1215
- void VPlan::addLiveOut (PHINode *PN, VPValue *V) {
1216
- assert (LiveOuts.count (PN) == 0 && " an exit value for PN already exists" );
1217
- LiveOuts.insert ({PN, new VPLiveOut (PN, V)});
1265
+ void VPlan::addLiveOut (PHINode *PN, VPValue *V, VPBasicBlock *IncomingBlock) {
1266
+ auto Key = std::pair<PHINode *, VPBasicBlock *>(PN, IncomingBlock);
1267
+ assert (LiveOuts.count (Key) == 0 && " an exit value for PN already exists" );
1268
+ LiveOuts.insert ({Key, new VPLiveOut (PN, V)});
1218
1269
}
1219
1270
1220
1271
static void remapOperands (VPBlockBase *Entry, VPBlockBase *NewEntry,
@@ -1285,8 +1336,9 @@ VPlan *VPlan::duplicate() {
1285
1336
remapOperands (Entry, NewEntry, Old2NewVPValues);
1286
1337
1287
1338
// Clone live-outs.
1288
- for (const auto &[_, LO] : LiveOuts)
1289
- NewPlan->addLiveOut (LO->getPhi (), Old2NewVPValues[LO->getOperand (0 )]);
1339
+ for (const auto &[Key, LO] : LiveOuts)
1340
+ NewPlan->addLiveOut (LO->getPhi (), Old2NewVPValues[LO->getOperand (0 )],
1341
+ Key.second );
1290
1342
1291
1343
// Initialize remaining fields of cloned VPlan.
1292
1344
NewPlan->VFs = VFs;
0 commit comments