-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Implement VPlan-based cost model for VPReduction, VPExtendedReduction and VPMulAccumulateReduction. #113903
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 58 commits
33b1f60
68fbd70
c8c9d56
d29a118
e5b50f7
cc004ff
b5445ca
1df91d4
a0b2f30
46928bd
35abf19
453997e
fa4f476
86ad2d8
594f9e4
52369d0
abc08f3
729a70e
ea58282
1c22ce2
a987456
6c434c7
f4b1b78
bffcac5
da705f1
1dc279e
20ea82e
90f9ffa
99512fe
2e4014a
38dd924
602a5e4
1939d44
2ee6e76
d584fc1
21b33e6
ae371e5
0d7b7f3
e12bd04
4906637
ca5db10
2fbdc7c
38d83bf
3e2acad
d2a5a43
484f9cc
cd86af4
84f8a46
36e1032
2483a29
56dcd90
26d938a
b32538f
fd539f8
71c7401
7da7983
f4afc2c
7b25767
685f217
710df44
829cb2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -782,19 +782,25 @@ Value *VPInstruction::generate(VPTransformState &State) { | |
InstructionCost VPInstruction::computeCost(ElementCount VF, | ||
VPCostContext &Ctx) const { | ||
if (Instruction::isBinaryOp(getOpcode())) { | ||
|
||
Type *ResTy = Ctx.Types.inferScalarType(this); | ||
if (!vputils::onlyFirstLaneUsed(this)) | ||
ResTy = toVectorTy(ResTy, VF); | ||
|
||
if (!getUnderlyingValue()) { | ||
// TODO: Compute cost for VPInstructions without underlying values once | ||
// the legacy cost model has been retired. | ||
return 0; | ||
switch (getOpcode()) { | ||
case Instruction::FMul: | ||
return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); | ||
default: | ||
// TODO: Compute cost for VPInstructions without underlying values once | ||
// the legacy cost model has been retired. | ||
return 0; | ||
} | ||
ElvisWang123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
assert(!doesGeneratePerAllLanes() && | ||
"Should only generate a vector value or single scalar, not scalars " | ||
"for all lanes."); | ||
Type *ResTy = Ctx.Types.inferScalarType(this); | ||
if (!vputils::onlyFirstLaneUsed(this)) | ||
ResTy = toVectorTy(ResTy, VF); | ||
|
||
return Ctx.TTI.getArithmeticInstrCost(getOpcode(), ResTy, Ctx.CostKind); | ||
} | ||
|
||
|
@@ -2527,24 +2533,47 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF, | |
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF)); | ||
unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind); | ||
FastMathFlags FMFs = getFastMathFlags(); | ||
std::optional<FastMathFlags> OptionalFMF = | ||
ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt; | ||
|
||
// TODO: Support any-of reductions. | ||
assert( | ||
(!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) || | ||
ForceTargetInstructionCost.getNumOccurrences() > 0) && | ||
"Any-of reduction not implemented in VPlan-based cost model currently."); | ||
|
||
// Cost = Reduction cost + BinOp cost | ||
InstructionCost Cost = | ||
Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind); | ||
// Note that TTI should model the cost of moving result to the scalar register | ||
// and the BinOp cost in the getReductionCost(). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you repharse this a bit to make it clearer what this note is about? Below we only call |
||
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) { | ||
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind); | ||
return Cost + | ||
Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind); | ||
Comment on lines
-2535
to
-2536
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind); | ||
} | ||
|
||
return Cost + Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs, | ||
Ctx.CostKind); | ||
return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF, | ||
Ctx.CostKind); | ||
} | ||
|
||
InstructionCost | ||
VPExtendedReductionRecipe::computeCost(ElementCount VF, | ||
VPCostContext &Ctx) const { | ||
unsigned Opcode = RecurrenceDescriptor::getOpcode(getRecurrenceKind()); | ||
Type *RedTy = Ctx.Types.inferScalarType(this); | ||
auto *SrcVecTy = | ||
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF)); | ||
assert(RedTy->isIntegerTy() && | ||
"ExtendedReduction only support integer type currently."); | ||
return Ctx.TTI.getExtendedReductionCost(Opcode, isZExt(), RedTy, SrcVecTy, | ||
std::nullopt, Ctx.CostKind); | ||
} | ||
|
||
InstructionCost | ||
VPMulAccumulateReductionRecipe::computeCost(ElementCount VF, | ||
VPCostContext &Ctx) const { | ||
Type *RedTy = Ctx.Types.inferScalarType(this); | ||
auto *SrcVecTy = | ||
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF)); | ||
return Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy, | ||
Ctx.CostKind); | ||
} | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.