-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Implement interleaving as VPlan-to-VPlan transform. #95842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0c3c293
cba8b59
f3e47f5
4abc317
9360440
cf7d783
41b7cc9
713eec1
ced94e8
6fd2416
548474c
ecdf378
4daee0a
23ac7f6
faf867c
26fc035
34595b8
e441720
a838eb4
1cd971c
2b0b1e9
470b374
7ff3b63
862121d
bb7ddcf
1a5113c
6ce7bf8
99bc59d
65150b5
d2073e5
2db5340
5de37ef
2e8535d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -532,6 +532,7 @@ class VPBlockBase { | |||||||
VPBlocksTy &getSuccessors() { return Successors; } | ||||||||
|
||||||||
iterator_range<VPBlockBase **> successors() { return Successors; } | ||||||||
iterator_range<VPBlockBase **> predecessors() { return Predecessors; } | ||||||||
|
||||||||
const VPBlocksTy &getPredecessors() const { return Predecessors; } | ||||||||
VPBlocksTy &getPredecessors() { return Predecessors; } | ||||||||
|
@@ -724,6 +725,11 @@ class VPLiveOut : public VPUser { | |||||||
|
||||||||
PHINode *getPhi() const { return Phi; } | ||||||||
|
||||||||
/// Live-outs are marked as only using the first part during the transition | ||||||||
/// to unrolling directly on VPlan. | ||||||||
/// TODO: Remove after unroller transition. | ||||||||
bool onlyFirstPartUsed(const VPValue *Op) const override { return true; } | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A live-out user can only use the first part - given than only a single part is available now? I.e., answer depends on interleaving taking place? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, mostly for the transition. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth a note. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||||||||
|
||||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||||
/// Print the VPLiveOut to \p O. | ||||||||
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; | ||||||||
|
@@ -1226,11 +1232,24 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { | |||||||
#endif | ||||||||
}; | ||||||||
|
||||||||
/// Helper to access the operand that contains the unroll part for this recipe | ||||||||
/// after unrolling. | ||||||||
template <unsigned PartOpIdx> class VPUnrollPartAccessor { | ||||||||
protected: | ||||||||
/// Return the VPValue operand containing the unroll part or null if there is | ||||||||
/// no such operand. | ||||||||
VPValue *getUnrollPartOperand(VPUser &U) const; | ||||||||
|
||||||||
/// Return the unroll part. | ||||||||
unsigned getUnrollPart(VPUser &U) const; | ||||||||
}; | ||||||||
|
||||||||
/// This is a concrete Recipe that models a single VPlan-level instruction. | ||||||||
/// While as any Recipe it may generate a sequence of IR instructions when | ||||||||
/// executed, these instructions would always form a single-def expression as | ||||||||
/// the VPInstruction is also a single def-use vertex. | ||||||||
class VPInstruction : public VPRecipeWithIRFlags { | ||||||||
class VPInstruction : public VPRecipeWithIRFlags, | ||||||||
public VPUnrollPartAccessor<1> { | ||||||||
friend class VPlanSlp; | ||||||||
|
||||||||
public: | ||||||||
|
@@ -1764,7 +1783,8 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags { | |||||||
/// A recipe to compute the pointers for widened memory accesses of IndexTy for | ||||||||
/// all parts. If IsReverse is true, compute pointers for accessing the input in | ||||||||
/// reverse order per part. | ||||||||
class VPVectorPointerRecipe : public VPRecipeWithIRFlags { | ||||||||
class VPVectorPointerRecipe : public VPRecipeWithIRFlags, | ||||||||
public VPUnrollPartAccessor<1> { | ||||||||
Type *IndexedTy; | ||||||||
bool IsReverse; | ||||||||
|
||||||||
|
@@ -1789,7 +1809,7 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags { | |||||||
bool onlyFirstPartUsed(const VPValue *Op) const override { | ||||||||
assert(is_contained(operands(), Op) && | ||||||||
"Op must be an operand of the recipe"); | ||||||||
assert(getNumOperands() == 1 && "must have a single operand"); | ||||||||
assert(getNumOperands() <= 2 && "must have at most two operands"); | ||||||||
return true; | ||||||||
} | ||||||||
|
||||||||
|
@@ -1948,6 +1968,12 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { | |||||||
VPValue *getVFValue() { return getOperand(2); } | ||||||||
const VPValue *getVFValue() const { return getOperand(2); } | ||||||||
|
||||||||
VPValue *getSplatVFValue() { | ||||||||
// If the recipe has been unrolled (4 operands), return the VPValue for the | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
// induction increment. | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
return getNumOperands() == 5 ? getOperand(3) : nullptr; | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
suffice to ensure there's a 4th operand? |
||||||||
} | ||||||||
|
||||||||
/// Returns the first defined value as TruncInst, if it is one or nullptr | ||||||||
/// otherwise. | ||||||||
TruncInst *getTruncInst() { return Trunc; } | ||||||||
|
@@ -1967,9 +1993,17 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe { | |||||||
Type *getScalarType() const { | ||||||||
return Trunc ? Trunc->getType() : IV->getType(); | ||||||||
} | ||||||||
|
||||||||
/// Returns the VPValue representing the value of this induction at | ||||||||
/// the last unrolled part, if it exists. Returns itself if unrolling did not | ||||||||
/// take place. | ||||||||
VPValue *getLastUnrolledPartOperand() { | ||||||||
return getNumOperands() == 5 ? getOperand(4) : this; | ||||||||
} | ||||||||
Comment on lines
+1996
to
+2002
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better place this accessor for operand 4 above, after getSplatVFValue(), completing the accessors for operands 1,2 and 3. |
||||||||
}; | ||||||||
|
||||||||
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { | ||||||||
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe, | ||||||||
public VPUnrollPartAccessor<3> { | ||||||||
const InductionDescriptor &IndDesc; | ||||||||
|
||||||||
bool IsScalarAfterVectorization; | ||||||||
|
@@ -2006,6 +2040,13 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe { | |||||||
/// Returns the induction descriptor for the recipe. | ||||||||
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; } | ||||||||
|
||||||||
/// Returns the VPValue representing the value of this induction at | ||||||||
/// the first unrolled part, if it exists. Returns itself if unrolling did not | ||||||||
/// take place. | ||||||||
VPValue *getFirstUnrolledPartOperand() { | ||||||||
return getUnrollPart(*this) == 0 ? this : getOperand(2); | ||||||||
} | ||||||||
|
||||||||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||||
/// Print the recipe. | ||||||||
void print(raw_ostream &O, const Twine &Indent, | ||||||||
|
@@ -2088,7 +2129,8 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { | |||||||
/// A recipe for handling reduction phis. The start value is the first operand | ||||||||
/// of the recipe and the incoming value from the backedge is the second | ||||||||
/// operand. | ||||||||
class VPReductionPHIRecipe : public VPHeaderPHIRecipe { | ||||||||
class VPReductionPHIRecipe : public VPHeaderPHIRecipe, | ||||||||
public VPUnrollPartAccessor<2> { | ||||||||
/// Descriptor for the reduction. | ||||||||
const RecurrenceDescriptor &RdxDesc; | ||||||||
|
||||||||
|
@@ -2907,7 +2949,10 @@ class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe { | |||||||
~VPActiveLaneMaskPHIRecipe() override = default; | ||||||||
|
||||||||
VPActiveLaneMaskPHIRecipe *clone() override { | ||||||||
return new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc()); | ||||||||
auto *R = new VPActiveLaneMaskPHIRecipe(getOperand(0), getDebugLoc()); | ||||||||
if (getNumOperands() == 2) | ||||||||
R->addOperand(getOperand(1)); | ||||||||
return R; | ||||||||
} | ||||||||
|
||||||||
VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC) | ||||||||
|
@@ -2966,7 +3011,8 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { | |||||||
}; | ||||||||
|
||||||||
/// A Recipe for widening the canonical induction variable of the vector loop. | ||||||||
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { | ||||||||
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe, | ||||||||
public VPUnrollPartAccessor<1> { | ||||||||
public: | ||||||||
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV) | ||||||||
: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {} | ||||||||
|
@@ -3052,7 +3098,8 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { | |||||||
|
||||||||
/// A recipe for handling phi nodes of integer and floating-point inductions, | ||||||||
/// producing their scalar values. | ||||||||
class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags { | ||||||||
class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags, | ||||||||
public VPUnrollPartAccessor<2> { | ||||||||
Instruction::BinaryOps InductionOpcode; | ||||||||
|
||||||||
public: | ||||||||
|
@@ -3548,6 +3595,11 @@ class VPlan { | |||||||
|
||||||||
bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } | ||||||||
|
||||||||
unsigned getUF() const { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, thanks! |
||||||||
assert(UFs.size() == 1 && "Expected a single UF"); | ||||||||
return UFs[0]; | ||||||||
} | ||||||||
|
||||||||
void setUF(unsigned UF) { | ||||||||
assert(hasUF(UF) && "Cannot set the UF not already in plan"); | ||||||||
UFs.clear(); | ||||||||
|
@@ -3732,6 +3784,22 @@ class VPBlockUtils { | |||||||
connectBlocks(BlockPtr, NewBlock); | ||||||||
} | ||||||||
|
||||||||
/// Insert disconnected block \p NewBlock before \p Blockptr. First | ||||||||
/// disconnects all predecessors of \p BlockPtr and connects them to \p | ||||||||
/// NewBlock. Add \p NewBlock as predecessor of \p BlockPtr and \p BlockPtr as | ||||||||
/// successor of \p NewBlock. | ||||||||
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deserves documentation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks! |
||||||||
assert(NewBlock->getSuccessors().empty() && | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Alternatively, can insert after and swap the block contents, but these may be a series of recipes or a cfg of blocks... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Left as-is for now. |
||||||||
NewBlock->getPredecessors().empty() && | ||||||||
"Can't insert new block with predecessors or successors."); | ||||||||
NewBlock->setParent(BlockPtr->getParent()); | ||||||||
for (VPBlockBase *Pred : to_vector(BlockPtr->predecessors())) { | ||||||||
disconnectBlocks(Pred, BlockPtr); | ||||||||
connectBlocks(Pred, NewBlock); | ||||||||
} | ||||||||
connectBlocks(NewBlock, BlockPtr); | ||||||||
} | ||||||||
|
||||||||
/// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p | ||||||||
/// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p | ||||||||
/// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A related thought: is InnerLoopUnroller still employed, when VF=1;UF>1, now that unrolling is implemented earlier in VPlan?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can be removed as follow-up. It doesn't really serve anything even without this change, as it just offers a constructor that doesn't take a VF
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, InnerLoopUnroller used to override a few methods, but indeed seems ready to be retired.