@@ -535,6 +535,7 @@ class VPBlockBase {
535
535
VPBlocksTy &getSuccessors () { return Successors; }
536
536
537
537
iterator_range<VPBlockBase **> successors () { return Successors; }
538
+ iterator_range<VPBlockBase **> predecessors () { return Predecessors; }
538
539
539
540
const VPBlocksTy &getPredecessors () const { return Predecessors; }
540
541
VPBlocksTy &getPredecessors () { return Predecessors; }
@@ -1400,7 +1401,7 @@ class VPInstruction : public VPRecipeWithIRFlags {
1400
1401
// / result is also a single scalar.
1401
1402
bool isSingleScalar () const ;
1402
1403
1403
- // / Return the interleave count from the VPInstruction's last argument .
1404
+ // / Return the interleave count from VPInstruction's last operand .
1404
1405
unsigned getInterleaveCount () const ;
1405
1406
};
1406
1407
@@ -1690,7 +1691,7 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
1690
1691
isInBounds (), getDebugLoc ());
1691
1692
}
1692
1693
1693
- // / Return the current part for this vector pointer.
1694
+ // / Return the part associated with this vector pointer.
1694
1695
unsigned getPartForRecipe () const ;
1695
1696
1696
1697
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2034,7 +2035,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
2034
2035
// / Returns true, if the phi is part of an in-loop reduction.
2035
2036
bool isInLoop () const { return IsInLoop; }
2036
2037
2037
- // / Return the current part for this scalar step .
2038
+ // / Return the part associated with this reduction phi .
2038
2039
unsigned getPartForRecipe () const ;
2039
2040
};
2040
2041
@@ -2746,9 +2747,6 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
2746
2747
// / Generate the canonical scalar induction phi of the vector loop.
2747
2748
void execute (VPTransformState &State) override ;
2748
2749
2749
- // / Return the current part for this scalar step.
2750
- unsigned getPartForRecipe () const ;
2751
-
2752
2750
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2753
2751
// / Print the recipe.
2754
2752
void print (raw_ostream &O, const Twine &Indent,
@@ -2873,7 +2871,7 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
2873
2871
// / step = <VF*UF, VF*UF, ..., VF*UF>.
2874
2872
void execute (VPTransformState &State) override ;
2875
2873
2876
- // / Return the current part for this scalar step .
2874
+ // / Return the part associated with this widened IV .
2877
2875
unsigned getPartForRecipe () const ;
2878
2876
2879
2877
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2989,7 +2987,7 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
2989
2987
return true ;
2990
2988
}
2991
2989
2992
- // / Return the current part for this scalar step.
2990
+ // / Return the part associated with this scalar step
2993
2991
unsigned getPartForRecipe () const ;
2994
2992
};
2995
2993
@@ -3093,6 +3091,7 @@ class VPBasicBlock : public VPBlockBase {
3093
3091
VPBasicBlock *splitAt (iterator SplitAt);
3094
3092
3095
3093
VPRegionBlock *getEnclosingLoopRegion ();
3094
+ const VPRegionBlock *getEnclosingLoopRegion () const ;
3096
3095
3097
3096
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3098
3097
// / Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
@@ -3315,6 +3314,7 @@ class VPlan {
3315
3314
// / Represents the loop-invariant VF * UF of the vector loop region.
3316
3315
VPValue VFxUF;
3317
3316
3317
+ // / Represents the loop-invariant VF of the vector loop region.
3318
3318
VPValue VF;
3319
3319
3320
3320
// / Holds a mapping between Values and their corresponding VPValue inside
@@ -3620,6 +3620,19 @@ class VPBlockUtils {
3620
3620
connectBlocks (BlockPtr, NewBlock);
3621
3621
}
3622
3622
3623
+ static void insertBlockBefore (VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3624
+ assert (NewBlock->getSuccessors ().empty () &&
3625
+ NewBlock->getPredecessors ().empty () &&
3626
+ " Can't insert new block with predecessors or successors." );
3627
+ NewBlock->setParent (BlockPtr->getParent ());
3628
+ SmallVector<VPBlockBase *> Preds (BlockPtr->predecessors ());
3629
+ for (VPBlockBase *Pred : Preds) {
3630
+ disconnectBlocks (Pred, BlockPtr);
3631
+ connectBlocks (Pred, NewBlock);
3632
+ }
3633
+ connectBlocks (NewBlock, BlockPtr);
3634
+ }
3635
+
3623
3636
// / Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3624
3637
// / BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3625
3638
// / BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
@@ -3850,25 +3863,36 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) {
3850
3863
// / Return true if \p V is a header mask in \p Plan.
3851
3864
bool isHeaderMask (const VPValue *V, VPlan &Plan);
3852
3865
3853
- // / Checks if \p C is uniform across all VFs and UFs. It is considered as such
3854
- // / if it is either defined outside the vector region or its operand is known to
3855
- // / be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
3866
+ // / Checks if \p C is uniform across all VF lanes and UF parts. It is considered
3867
+ // / as such if it is either loop invariant (defined outside the vector region)
3868
+ // / or its operand is known to be uniform across all VFs and UFs (e.g.
3869
+ // / VPDerivedIV or VPCanonicalIVPHI).
3856
3870
inline bool isUniformAcrossVFsAndUFs (VPValue *V) {
3857
- if (V->isLiveIn ())
3871
+ // Loop invariants are uniform:
3872
+ if (V->isDefinedOutsideVectorRegions ())
3858
3873
return true ;
3859
- if (isa<VPCanonicalIVPHIRecipe, VPDerivedIVRecipe, VPExpandSCEVRecipe>(V))
3874
+
3875
+ auto *R = V->getDefiningRecipe ();
3876
+ // Canonical IV chain is uniform:
3877
+ auto *CanonicalIV = R->getParent ()->getPlan ()->getCanonicalIV ();
3878
+ if (R == CanonicalIV || V == CanonicalIV->getBackedgeValue ())
3860
3879
return true ;
3861
- auto *R = cast<VPSingleDefRecipe>(V->getDefiningRecipe ());
3862
- if (R == R->getParent ()->getPlan ()->getCanonicalIV ()->getBackedgeValue ())
3880
+
3881
+ // DerivedIV is uniform:
3882
+ if (isa<VPDerivedIVRecipe>(R))
3863
3883
return true ;
3884
+
3885
+ // Loads and stores that are uniform across VF lanes are handled by
3886
+ // VPReplicateRecipe.IsUniform. They are also uniform across UF parts if all
3887
+ // their operands are invariant:
3864
3888
if (isa<VPReplicateRecipe>(V) && cast<VPReplicateRecipe>(V)->isUniform () &&
3865
3889
(isa<LoadInst, StoreInst>(V->getUnderlyingValue ())) &&
3866
- all_of (V-> getDefiningRecipe () ->operands (),
3890
+ all_of (R ->operands (),
3867
3891
[](VPValue *Op) { return Op->isDefinedOutsideVectorRegions (); }))
3868
3892
return true ;
3869
3893
3870
3894
return isa<VPScalarCastRecipe, VPWidenCastRecipe>(R) &&
3871
- (R->isDefinedOutsideVectorRegions () || R-> getOperand (0 )->isLiveIn () ||
3895
+ (R->getOperand (0 )->isLiveIn () ||
3872
3896
isa<VPDerivedIVRecipe>(R->getOperand (0 )) ||
3873
3897
isa<VPCanonicalIVPHIRecipe>(R->getOperand (0 )));
3874
3898
}
0 commit comments