-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Implement VPlan-based cost model for VPReduction, VPExtendedReduction and VPMulAccumulateReduction. #113903
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 47 commits
33b1f60
68fbd70
c8c9d56
d29a118
e5b50f7
cc004ff
b5445ca
1df91d4
a0b2f30
46928bd
35abf19
453997e
fa4f476
86ad2d8
594f9e4
52369d0
abc08f3
729a70e
ea58282
1c22ce2
a987456
6c434c7
f4b1b78
bffcac5
da705f1
1dc279e
20ea82e
90f9ffa
99512fe
2e4014a
38dd924
602a5e4
1939d44
2ee6e76
d584fc1
21b33e6
ae371e5
0d7b7f3
e12bd04
4906637
ca5db10
2fbdc7c
38d83bf
3e2acad
d2a5a43
484f9cc
cd86af4
84f8a46
36e1032
2483a29
56dcd90
26d938a
b32538f
fd539f8
71c7401
7da7983
f4afc2c
7b25767
685f217
710df44
829cb2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7321,63 +7321,6 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, | |
} | ||
} | ||
|
||
// The legacy cost model has special logic to compute the cost of in-loop | ||
// reductions, which may be smaller than the sum of all instructions involved | ||
// in the reduction. | ||
// TODO: Switch to costing based on VPlan once the logic has been ported. | ||
for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) { | ||
if (ForceTargetInstructionCost.getNumOccurrences()) | ||
continue; | ||
|
||
if (!CM.isInLoopReduction(RedPhi)) | ||
continue; | ||
|
||
const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop); | ||
SetVector<Instruction *> ChainOpsAndOperands(ChainOps.begin(), | ||
ChainOps.end()); | ||
auto IsZExtOrSExt = [](const unsigned Opcode) -> bool { | ||
return Opcode == Instruction::ZExt || Opcode == Instruction::SExt; | ||
}; | ||
// Also include the operands of instructions in the chain, as the cost-model | ||
// may mark extends as free. | ||
// | ||
// For ARM, some of the instruction can folded into the reducion | ||
// instruction. So we need to mark all folded instructions free. | ||
// For example: We can fold reduce(mul(ext(A), ext(B))) into one | ||
// instruction. | ||
for (auto *ChainOp : ChainOps) { | ||
for (Value *Op : ChainOp->operands()) { | ||
if (auto *I = dyn_cast<Instruction>(Op)) { | ||
ChainOpsAndOperands.insert(I); | ||
if (I->getOpcode() == Instruction::Mul) { | ||
auto *Ext0 = dyn_cast<Instruction>(I->getOperand(0)); | ||
auto *Ext1 = dyn_cast<Instruction>(I->getOperand(1)); | ||
if (Ext0 && IsZExtOrSExt(Ext0->getOpcode()) && Ext1 && | ||
Ext0->getOpcode() == Ext1->getOpcode()) { | ||
ChainOpsAndOperands.insert(Ext0); | ||
ChainOpsAndOperands.insert(Ext1); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Pre-compute the cost for I, if it has a reduction pattern cost. | ||
for (Instruction *I : ChainOpsAndOperands) { | ||
auto ReductionCost = | ||
CM.getReductionPatternCost(I, VF, toVectorTy(I->getType(), VF)); | ||
if (!ReductionCost) | ||
continue; | ||
|
||
assert(!CostCtx.SkipCostComputation.contains(I) && | ||
"reduction op visited multiple times"); | ||
CostCtx.SkipCostComputation.insert(I); | ||
LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF | ||
<< ":\n in-loop reduction " << *I << "\n"); | ||
Cost += *ReductionCost; | ||
} | ||
} | ||
|
||
// Pre-compute the costs for branches except for the backedge, as the number | ||
// of replicate regions in a VPlan may not directly match the number of | ||
// branches, which would lead to different decisions. | ||
|
@@ -9510,10 +9453,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |
"entry block must be set to a VPRegionBlock having a non-empty entry " | ||
"VPBasicBlock"); | ||
|
||
for (ElementCount VF : Range) | ||
Plan->addVF(VF); | ||
Plan->setName("Initial VPlan"); | ||
|
||
// Update wide induction increments to use the same step as the corresponding | ||
// wide induction. This enables detecting induction increments directly in | ||
// VPlan and removes redundant splats. | ||
|
@@ -9549,6 +9488,21 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { | |
// Adjust the recipes for any inloop reductions. | ||
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start); | ||
|
||
// Transform recipes to abstract recipes if it is leagal and beneficial and | ||
// clamp the range for better cost estimation. | ||
// TODO: Enable following transform when the EVL-version of extended-reduction | ||
// and mulacc-reduction are implemented. | ||
if (!CM.foldTailWithEVL()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to special case this for EVL? Shouldn't the cost-model tell us that the combined reductions aren't profitable? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not for the cost model issue but the EVL-recipe generation is not implemented yet. |
||
VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM, | ||
CM.CostKind); | ||
VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan, | ||
CostCtx, Range); | ||
} | ||
|
||
for (ElementCount VF : Range) | ||
Plan->addVF(VF); | ||
sdesmalen-arm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Plan->setName("Initial VPlan"); | ||
|
||
// Interleave memory: for each Interleave Group we marked earlier as relevant | ||
// for this VPlan, replace the Recipes widening its memory instructions with a | ||
// single VPInterleaveRecipe at its insertion point. | ||
|
Uh oh!
There was an error while loading. Please reload this page.