@@ -2012,6 +2012,30 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
2012
2012
State.set (this , NewRed, /* IsScalar*/ true );
2013
2013
}
2014
2014
2015
+ InstructionCost VPReductionRecipe::computeCost (ElementCount VF,
2016
+ VPCostContext &Ctx) const {
2017
+ RecurKind RdxKind = RdxDesc.getRecurrenceKind ();
2018
+ Type *ElementTy = RdxDesc.getRecurrenceType ();
2019
+ auto *VectorTy = dyn_cast<VectorType>(ToVectorTy (ElementTy, VF));
2020
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2021
+ unsigned Opcode = RdxDesc.getOpcode ();
2022
+
2023
+ if (VectorTy == nullptr )
2024
+ return InstructionCost::getInvalid ();
2025
+
2026
+ // Cost = Reduction cost + BinOp cost
2027
+ InstructionCost Cost =
2028
+ Ctx.TTI .getArithmeticInstrCost (Opcode, ElementTy, CostKind);
2029
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind (RdxKind)) {
2030
+ Intrinsic::ID Id = getMinMaxReductionIntrinsicOp (RdxKind);
2031
+ return Cost + Ctx.TTI .getMinMaxReductionCost (
2032
+ Id, VectorTy, RdxDesc.getFastMathFlags (), CostKind);
2033
+ }
2034
+
2035
+ return Cost + Ctx.TTI .getArithmeticReductionCost (
2036
+ Opcode, VectorTy, RdxDesc.getFastMathFlags (), CostKind);
2037
+ }
2038
+
2015
2039
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2016
2040
void VPReductionRecipe::print (raw_ostream &O, const Twine &Indent,
2017
2041
VPSlotTracker &SlotTracker) const {
0 commit comments