-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Expand VPWidenIntOrFpInductionRecipe into separate recipes #118638
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 22 commits
e72140a
6fc4a7c
e7f5a4e
31bdc7c
42dc912
6839647
6c659ca
1cd6c81
dcc6640
ca81015
8b77af4
5ad4cc1
d183c91
79b9700
d3d85f6
8f94610
dff84ec
5457030
68b6b66
8257a57
0f8b4f6
7fc4859
61bd641
ec5fe59
466ed14
b315afb
993ba23
894bde0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1874,12 +1874,13 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe { | |
}; | ||
|
||
/// A recipe for handling phi nodes of integer and floating-point inductions, | ||
/// producing their vector values. | ||
/// producing their vector values. This won't execute any LLVM IR and will get | ||
lukel97 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// expanded later into several other recipes in convertToConcreteRecipes. | ||
class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { | ||
TruncInst *Trunc; | ||
|
||
// If this recipe is unrolled it will have 2 additional operands. | ||
bool isUnrolled() const { return getNumOperands() == 6; } | ||
bool isUnrolled() const { return getNumOperands() == 5; } | ||
|
||
public: | ||
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, | ||
|
@@ -1915,9 +1916,10 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { | |
|
||
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) | ||
|
||
/// Generate the vectorized and scalarized versions of the phi node as | ||
/// needed by their users. | ||
void execute(VPTransformState &State) override; | ||
void execute(VPTransformState &State) override { | ||
llvm_unreachable("cannot execute this recipe, should be expanded via " | ||
"expandVPWidenIntOrFpInductionRecipe"); | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe. | ||
|
@@ -1928,16 +1930,6 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { | |
VPValue *getVFValue() { return getOperand(2); } | ||
const VPValue *getVFValue() const { return getOperand(2); } | ||
|
||
// TODO: Remove once VPWidenIntOrFpInduction is fully expanded in | ||
// convertToConcreteRecipes. | ||
VPInstructionWithType *getStepVector() { | ||
auto *StepVector = | ||
cast<VPInstructionWithType>(getOperand(3)->getDefiningRecipe()); | ||
assert(StepVector->getOpcode() == VPInstruction::StepVector && | ||
"step vector operand must be a VPInstruction::StepVector"); | ||
return StepVector; | ||
} | ||
|
||
VPValue *getSplatVFValue() { | ||
// If the recipe has been unrolled return the VPValue for the induction | ||
// increment. | ||
|
@@ -2026,7 +2018,7 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors { | |
public: | ||
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and | ||
/// debug location \p DL. | ||
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, | ||
VPWidenPHIRecipe(Instruction *Phi, VPValue *Start = nullptr, DebugLoc DL = {}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A VPWidenIntOrFpInduction's underlying value can be a trunc instruction too, but now that you mention it I think it's ok if we just pass |
||
const Twine &Name = "") | ||
: VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL), | ||
Name(Name.str()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -966,6 +966,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
case VPInstruction::BranchOnCount: | ||
case VPInstruction::BranchOnCond: | ||
case VPInstruction::ResumePhi: | ||
case VPInstruction::Broadcast: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was needed since we're now using VPInstruction::Broadcasts for arbitrary VPValues |
||
return true; | ||
case VPInstruction::PtrAdd: | ||
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); | ||
|
@@ -1086,15 +1087,14 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
|
||
void VPInstructionWithType::execute(VPTransformState &State) { | ||
State.setDebugLocFrom(getDebugLoc()); | ||
switch (getOpcode()) { | ||
case Instruction::ZExt: | ||
case Instruction::Trunc: { | ||
if (isScalarCast()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needed because one of the recipes expanded to is Instruction::UIToFP. Happy to split this off to an NFC if wanted. |
||
Value *Op = State.get(getOperand(0), VPLane(0)); | ||
Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()), | ||
Op, ResultTy); | ||
State.set(this, Cast, VPLane(0)); | ||
break; | ||
return; | ||
} | ||
switch (getOpcode()) { | ||
case VPInstruction::StepVector: { | ||
Value *StepVector = | ||
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF)); | ||
|
@@ -1884,149 +1884,13 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, | |
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind); | ||
} | ||
|
||
/// This function adds | ||
/// (0 * Step, 1 * Step, 2 * Step, ...) | ||
/// to each vector element of Val. | ||
/// \p Opcode is relevant for FP induction variable. | ||
/// \p InitVec is an integer step vector from 0 with a step of 1. | ||
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec, | ||
Instruction::BinaryOps BinOp, ElementCount VF, | ||
IRBuilderBase &Builder) { | ||
assert(VF.isVector() && "only vector VFs are supported"); | ||
|
||
// Create and check the types. | ||
auto *ValVTy = cast<VectorType>(Val->getType()); | ||
ElementCount VLen = ValVTy->getElementCount(); | ||
|
||
Type *STy = Val->getType()->getScalarType(); | ||
assert((STy->isIntegerTy() || STy->isFloatingPointTy()) && | ||
"Induction Step must be an integer or FP"); | ||
assert(Step->getType() == STy && "Step has wrong type"); | ||
|
||
if (STy->isIntegerTy()) { | ||
Step = Builder.CreateVectorSplat(VLen, Step); | ||
assert(Step->getType() == Val->getType() && "Invalid step vec"); | ||
// FIXME: The newly created binary instructions should contain nsw/nuw | ||
// flags, which can be found from the original scalar operations. | ||
Step = Builder.CreateMul(InitVec, Step); | ||
return Builder.CreateAdd(Val, Step, "induction"); | ||
} | ||
|
||
// Floating point induction. | ||
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) && | ||
"Binary Opcode should be specified for FP induction"); | ||
InitVec = Builder.CreateUIToFP(InitVec, ValVTy); | ||
|
||
Step = Builder.CreateVectorSplat(VLen, Step); | ||
Value *MulOp = Builder.CreateFMul(InitVec, Step); | ||
return Builder.CreateBinOp(BinOp, Val, MulOp, "induction"); | ||
} | ||
|
||
/// A helper function that returns an integer or floating-point constant with | ||
/// value C. | ||
static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { | ||
return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, C) | ||
: ConstantFP::get(Ty, C); | ||
} | ||
|
||
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) { | ||
assert(!State.Lane && "Int or FP induction being replicated."); | ||
|
||
Value *Start = getStartValue()->getLiveInIRValue(); | ||
const InductionDescriptor &ID = getInductionDescriptor(); | ||
TruncInst *Trunc = getTruncInst(); | ||
IRBuilderBase &Builder = State.Builder; | ||
assert(getPHINode()->getType() == ID.getStartValue()->getType() && | ||
"Types must match"); | ||
assert(State.VF.isVector() && "must have vector VF"); | ||
|
||
// The value from the original loop to which we are mapping the new induction | ||
// variable. | ||
Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : getPHINode(); | ||
|
||
// Fast-math-flags propagate from the original induction instruction. | ||
IRBuilder<>::FastMathFlagGuard FMFG(Builder); | ||
if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp())) | ||
Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags()); | ||
|
||
// Now do the actual transformations, and start with fetching the step value. | ||
Value *Step = State.get(getStepValue(), VPLane(0)); | ||
|
||
assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) && | ||
"Expected either an induction phi-node or a truncate of it!"); | ||
|
||
// Construct the initial value of the vector IV in the vector loop preheader | ||
auto CurrIP = Builder.saveIP(); | ||
BasicBlock *VectorPH = | ||
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0)); | ||
Builder.SetInsertPoint(VectorPH->getTerminator()); | ||
if (isa<TruncInst>(EntryVal)) { | ||
assert(Start->getType()->isIntegerTy() && | ||
"Truncation requires an integer type"); | ||
auto *TruncType = cast<IntegerType>(EntryVal->getType()); | ||
Step = Builder.CreateTrunc(Step, TruncType); | ||
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); | ||
} | ||
|
||
Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start); | ||
Value *SteppedStart = | ||
::getStepVector(SplatStart, Step, State.get(getStepVector()), | ||
ID.getInductionOpcode(), State.VF, State.Builder); | ||
|
||
// We create vector phi nodes for both integer and floating-point induction | ||
// variables. Here, we determine the kind of arithmetic we will perform. | ||
Instruction::BinaryOps AddOp; | ||
Instruction::BinaryOps MulOp; | ||
if (Step->getType()->isIntegerTy()) { | ||
AddOp = Instruction::Add; | ||
MulOp = Instruction::Mul; | ||
} else { | ||
AddOp = ID.getInductionOpcode(); | ||
MulOp = Instruction::FMul; | ||
} | ||
|
||
Value *SplatVF; | ||
if (VPValue *SplatVFOperand = getSplatVFValue()) { | ||
// The recipe has been unrolled. In that case, fetch the splat value for the | ||
// induction increment. | ||
SplatVF = State.get(SplatVFOperand); | ||
} else { | ||
// Multiply the vectorization factor by the step using integer or | ||
// floating-point arithmetic as appropriate. | ||
Type *StepType = Step->getType(); | ||
Value *RuntimeVF = State.get(getVFValue(), VPLane(0)); | ||
if (Step->getType()->isFloatingPointTy()) | ||
RuntimeVF = Builder.CreateUIToFP(RuntimeVF, StepType); | ||
else | ||
RuntimeVF = Builder.CreateZExtOrTrunc(RuntimeVF, StepType); | ||
Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF); | ||
|
||
// Create a vector splat to use in the induction update. | ||
SplatVF = Builder.CreateVectorSplat(State.VF, Mul); | ||
} | ||
|
||
Builder.restoreIP(CurrIP); | ||
|
||
// We may need to add the step a number of times, depending on the unroll | ||
// factor. The last of those goes into the PHI. | ||
PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind"); | ||
VecInd->insertBefore(State.CFG.PrevBB->getFirstInsertionPt()); | ||
VecInd->setDebugLoc(getDebugLoc()); | ||
State.set(this, VecInd); | ||
|
||
Instruction *LastInduction = cast<Instruction>( | ||
Builder.CreateBinOp(AddOp, VecInd, SplatVF, "vec.ind.next")); | ||
LastInduction->setDebugLoc(getDebugLoc()); | ||
|
||
VecInd->addIncoming(SteppedStart, VectorPH); | ||
// Add induction update using an incorrect block temporarily. The phi node | ||
// will be fixed after VPlan execution. Note that at this point the latch | ||
// block cannot be used, as it does not exist yet. | ||
// TODO: Model increment value in VPlan, by turning the recipe into a | ||
// multi-def and a subclass of VPHeaderPHIRecipe. | ||
Comment on lines
-2067
to
-2068
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we first have a separate patch focused specifically on completing this TODO? If I understand correctly, this seems to be the primary issue at hand. Additionally, I’d like to understand the difference between introducing a new recipe, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The VPHeaderPHIRecipe part of this todo is already done, but I'm not sure if the part about turning this into a multi-def will fix the issue. If I'm understanding this correctly, if we had a multi-def recipe we would still be executing the increment/backedge value in the header before the rest of the loop body, and so we wouldn't be able to use the EVL in it.
I actually initially tried to avoid the new recipes and just use VPInstructions but I struggled with trying to splat two scalar VPInstructions into a vector operand. But actually now that I realise that #82021 does it I'll give it another try! I would prefer to avoid adding extra recipes if possible, I don't see any particular advantage to having explicit recipes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, in general at this late stage there's less advantage from specialized recipes, better to use simpler recipes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, in general at this late stage there's less advantage from specialized recipes, better to use simpler recipes |
||
VecInd->addIncoming(LastInduction, VectorPH); | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const { | ||
|
@@ -3793,9 +3657,6 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, | |
#endif | ||
|
||
void VPWidenPHIRecipe::execute(VPTransformState &State) { | ||
assert(EnableVPlanNativePath && | ||
"Non-native vplans are not expected to have VPWidenPHIRecipes."); | ||
|
||
Value *Op0 = State.get(getOperand(0)); | ||
Type *VecTy = Op0->getType(); | ||
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needed because we're now using VPWidenPHI on the non-vplan-native path. Happy to split this off into an NFC as well if wanted