Skip to content

Commit 4abc317

Browse files
committed
!fixup address latest comments, thanks!
1 parent f3e47f5 commit 4abc317

File tree

7 files changed

+236
-180
lines changed

7 files changed

+236
-180
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,8 @@ class VPBuilder {
166166
std::initializer_list<VPValue *> Operands,
167167
DebugLoc DL = {}, const Twine &Name = "",
168168
FastMathFlags FMFs = {}) {
169-
auto *Op = new VPInstruction(Opcode, Operands, FMFs, DL, Name);
170-
return tryInsertInstruction(Op);
169+
return tryInsertInstruction(
170+
new VPInstruction(Opcode, Operands, FMFs, DL, Name));
171171
}
172172

173173
VPValue *createNot(VPValue *Operand, DebugLoc DL = {},

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7392,8 +7392,8 @@ LoopVectorizationPlanner::executePlan(
73927392
"expanded SCEVs to reuse can only be used during epilogue vectorization");
73937393
(void)IsEpilogueVectorization;
73947394

7395-
VPlanTransforms::interleave(BestVPlan, BestUF,
7396-
OrigLoop->getHeader()->getModule()->getContext());
7395+
VPlanTransforms::interleaveByUF(
7396+
BestVPlan, BestUF, OrigLoop->getHeader()->getModule()->getContext());
73977397
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
73987398

73997399
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -572,8 +572,7 @@ VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
572572
return SplitBlock;
573573
}
574574

575-
VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
576-
VPRegionBlock *P = getParent();
575+
template <typename T> static T *getEnclosingLoopRegionImpl(T *P) {
577576
if (P && P->isReplicator()) {
578577
P = P->getParent();
579578
assert(!cast<VPRegionBlock>(P)->isReplicator() &&
@@ -582,6 +581,14 @@ VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
582581
return P;
583582
}
584583

584+
const VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() const {
585+
return getEnclosingLoopRegionImpl(getParent());
586+
}
587+
588+
VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() {
589+
return getEnclosingLoopRegionImpl(getParent());
590+
}
591+
585592
static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
586593
if (VPBB->empty()) {
587594
assert(
@@ -933,7 +940,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
933940
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF));
934941
if (VF.getNumUsers() > 0) {
935942
VF.setUnderlyingValue(
936-
createStepForVF(Builder, TripCountV->getType(), State.VF, 1));
943+
getRuntimeVF(Builder, TripCountV->getType(), State.VF));
937944
}
938945

939946
// When vectorizing the epilogue loop, the canonical induction start value
@@ -1053,10 +1060,12 @@ void VPlan::execute(VPTransformState *State) {
10531060
// Move the last step to the end of the latch block. This ensures
10541061
// consistent placement of all induction updates.
10551062
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1063+
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
1064+
1065+
// When the VPlan has been unrolled, chain together the steps of the
1066+
// unrolled parts together.
10561067
if (isa<VPWidenIntOrFpInductionRecipe>(&R) && R.getNumOperands() == 4)
10571068
Inc->setOperand(0, State->get(R.getOperand(3), 0));
1058-
1059-
Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
10601069
continue;
10611070
}
10621071

@@ -1427,7 +1436,8 @@ void VPlanIngredient::print(raw_ostream &O) const {
14271436
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
14281437

14291438
bool VPValue::isDefinedOutsideVectorRegions() const {
1430-
return !hasDefiningRecipe() || !getDefiningRecipe()->getParent()->getParent();
1439+
return !hasDefiningRecipe() ||
1440+
!getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
14311441
}
14321442

14331443
void VPValue::replaceAllUsesWith(VPValue *New) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,7 @@ class VPBlockBase {
535535
VPBlocksTy &getSuccessors() { return Successors; }
536536

537537
iterator_range<VPBlockBase **> successors() { return Successors; }
538+
iterator_range<VPBlockBase **> predecessors() { return Predecessors; }
538539

539540
const VPBlocksTy &getPredecessors() const { return Predecessors; }
540541
VPBlocksTy &getPredecessors() { return Predecessors; }
@@ -1400,7 +1401,7 @@ class VPInstruction : public VPRecipeWithIRFlags {
14001401
/// result is also a single scalar.
14011402
bool isSingleScalar() const;
14021403

1403-
/// Return the interleave count from the VPInstruction's last argument.
1404+
/// Return the interleave count from VPInstruction's last operand.
14041405
unsigned getInterleaveCount() const;
14051406
};
14061407

@@ -1690,7 +1691,7 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags {
16901691
isInBounds(), getDebugLoc());
16911692
}
16921693

1693-
/// Return the current part for this vector pointer.
1694+
/// Return the part associated with this vector pointer.
16941695
unsigned getPartForRecipe() const;
16951696

16961697
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2034,7 +2035,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe {
20342035
/// Returns true, if the phi is part of an in-loop reduction.
20352036
bool isInLoop() const { return IsInLoop; }
20362037

2037-
/// Return the current part for this scalar step.
2038+
/// Return the part associated with this reduction phi.
20382039
unsigned getPartForRecipe() const;
20392040
};
20402041

@@ -2746,9 +2747,6 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
27462747
/// Generate the canonical scalar induction phi of the vector loop.
27472748
void execute(VPTransformState &State) override;
27482749

2749-
/// Return the current part for this scalar step.
2750-
unsigned getPartForRecipe() const;
2751-
27522750
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
27532751
/// Print the recipe.
27542752
void print(raw_ostream &O, const Twine &Indent,
@@ -2873,7 +2871,7 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
28732871
/// step = <VF*UF, VF*UF, ..., VF*UF>.
28742872
void execute(VPTransformState &State) override;
28752873

2876-
/// Return the current part for this scalar step.
2874+
/// Return the part associated with this widened IV.
28772875
unsigned getPartForRecipe() const;
28782876

28792877
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2989,7 +2987,7 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
29892987
return true;
29902988
}
29912989

2992-
/// Return the current part for this scalar step.
2990+
/// Return the part associated with this scalar step
29932991
unsigned getPartForRecipe() const;
29942992
};
29952993

@@ -3093,6 +3091,7 @@ class VPBasicBlock : public VPBlockBase {
30933091
VPBasicBlock *splitAt(iterator SplitAt);
30943092

30953093
VPRegionBlock *getEnclosingLoopRegion();
3094+
const VPRegionBlock *getEnclosingLoopRegion() const;
30963095

30973096
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
30983097
/// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
@@ -3315,6 +3314,7 @@ class VPlan {
33153314
/// Represents the loop-invariant VF * UF of the vector loop region.
33163315
VPValue VFxUF;
33173316

3317+
/// Represents the loop-invariant VF of the vector loop region.
33183318
VPValue VF;
33193319

33203320
/// Holds a mapping between Values and their corresponding VPValue inside
@@ -3620,6 +3620,19 @@ class VPBlockUtils {
36203620
connectBlocks(BlockPtr, NewBlock);
36213621
}
36223622

3623+
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3624+
assert(NewBlock->getSuccessors().empty() &&
3625+
NewBlock->getPredecessors().empty() &&
3626+
"Can't insert new block with predecessors or successors.");
3627+
NewBlock->setParent(BlockPtr->getParent());
3628+
SmallVector<VPBlockBase *> Preds(BlockPtr->predecessors());
3629+
for (VPBlockBase *Pred : Preds) {
3630+
disconnectBlocks(Pred, BlockPtr);
3631+
connectBlocks(Pred, NewBlock);
3632+
}
3633+
connectBlocks(NewBlock, BlockPtr);
3634+
}
3635+
36233636
/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
36243637
/// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
36253638
/// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
@@ -3850,25 +3863,36 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) {
38503863
/// Return true if \p V is a header mask in \p Plan.
38513864
bool isHeaderMask(const VPValue *V, VPlan &Plan);
38523865

3853-
/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
3854-
/// if it is either defined outside the vector region or its operand is known to
3855-
/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
3866+
/// Checks if \p C is uniform across all VF lanes and UF parts. It is considered
3867+
/// as such if it is either loop invariant (defined outside the vector region)
3868+
/// or its operand is known to be uniform across all VFs and UFs (e.g.
3869+
/// VPDerivedIV or VPCanonicalIVPHI).
38563870
inline bool isUniformAcrossVFsAndUFs(VPValue *V) {
3857-
if (V->isLiveIn())
3871+
// Loop invariants are uniform:
3872+
if (V->isDefinedOutsideVectorRegions())
38583873
return true;
3859-
if (isa<VPCanonicalIVPHIRecipe, VPDerivedIVRecipe, VPExpandSCEVRecipe>(V))
3874+
3875+
auto *R = V->getDefiningRecipe();
3876+
// Canonical IV chain is uniform:
3877+
auto *CanonicalIV = R->getParent()->getPlan()->getCanonicalIV();
3878+
if (R == CanonicalIV || V == CanonicalIV->getBackedgeValue())
38603879
return true;
3861-
auto *R = cast<VPSingleDefRecipe>(V->getDefiningRecipe());
3862-
if (R == R->getParent()->getPlan()->getCanonicalIV()->getBackedgeValue())
3880+
3881+
// DerivedIV is uniform:
3882+
if (isa<VPDerivedIVRecipe>(R))
38633883
return true;
3884+
3885+
// Loads and stores that are uniform across VF lanes are handled by
3886+
// VPReplicateRecipe.IsUniform. They are also uniform across UF parts if all
3887+
// their operands are invariant:
38643888
if (isa<VPReplicateRecipe>(V) && cast<VPReplicateRecipe>(V)->isUniform() &&
38653889
(isa<LoadInst, StoreInst>(V->getUnderlyingValue())) &&
3866-
all_of(V->getDefiningRecipe()->operands(),
3890+
all_of(R->operands(),
38673891
[](VPValue *Op) { return Op->isDefinedOutsideVectorRegions(); }))
38683892
return true;
38693893

38703894
return isa<VPScalarCastRecipe, VPWidenCastRecipe>(R) &&
3871-
(R->isDefinedOutsideVectorRegions() || R->getOperand(0)->isLiveIn() ||
3895+
(R->getOperand(0)->isLiveIn() ||
38723896
isa<VPDerivedIVRecipe>(R->getOperand(0)) ||
38733897
isa<VPCanonicalIVPHIRecipe>(R->getOperand(0)));
38743898
}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -544,8 +544,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
544544
return CondBr;
545545
}
546546
case VPInstruction::ComputeReductionResult: {
547-
unsigned NumParts = getNumOperands() - 1;
548-
549547
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
550548
// and will be removed by breaking up the recipe further.
551549
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
@@ -556,8 +554,11 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
556554
RecurKind RK = RdxDesc.getRecurrenceKind();
557555

558556
Type *PhiTy = OrigPhi->getType();
557+
// The recipe's operands are the reduction phi, followed by one operand for
558+
// each part of the reduction.
559+
unsigned NumParts = getNumOperands() - 1;
559560
VectorParts RdxParts(NumParts);
560-
for (unsigned Part = 0; Part != NumParts; ++Part)
561+
for (unsigned Part = 0; Part < NumParts; ++Part)
561562
RdxParts[Part] = State.get(getOperand(1 + Part), 0, PhiR->isInLoop());
562563

563564
// If the vector reduction can be performed in a smaller type, we truncate
@@ -688,6 +689,9 @@ bool VPInstruction::isSingleScalar() const {
688689
}
689690

690691
unsigned VPInstruction::getInterleaveCount() const {
692+
assert((getOpcode() == VPInstruction::CalculateTripCountMinusVF ||
693+
getOpcode() == VPInstruction::CanonicalIVIncrementForPart) &&
694+
"used with unexpected opcode");
691695
return getNumOperands() == 1
692696
? 1
693697
: cast<ConstantInt>(getOperand(1)->getLiveInIRValue())
@@ -1313,7 +1317,6 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
13131317

13141318
Value *SplatVF;
13151319
if (getNumOperands() == 4) {
1316-
// Need to create stuff in PH.
13171320
SplatVF = State.get(getOperand(2), 0);
13181321
} else {
13191322
// Multiply the vectorization factor by the step using integer or

0 commit comments

Comments
 (0)