Skip to content

Commit ff7c797

Browse files
committed
!fixup, Remove computeCost() for new recipes.
1 parent 77da697 commit ff7c797

File tree

3 files changed

+16
-46
lines changed

3 files changed

+16
-46
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

+3-8
Original file line numberDiff line numberDiff line change
@@ -2586,6 +2586,7 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
25862586
// Not all WidenCastRecipes contain nneg flag. Need to transfer flags from
25872587
// the original recipe to prevent setting wrong flags.
25882588
transferFlags(*Ext);
2589+
setUnderlyingValue(R->getUnderlyingValue());
25892590
}
25902591

25912592
~VPExtendedReductionRecipe() override = default;
@@ -2601,10 +2602,6 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
26012602
"VPExtendedRecipe + VPReductionRecipe before execution.");
26022603
};
26032604

2604-
/// Return the cost of VPExtendedReductionRecipe.
2605-
InstructionCost computeCost(ElementCount VF,
2606-
VPCostContext &Ctx) const override;
2607-
26082605
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
26092606
/// Print the recipe.
26102607
void print(raw_ostream &O, const Twine &Indent,
@@ -2664,6 +2661,7 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
26642661
assert((ExtOp == Instruction::CastOps::ZExt ||
26652662
ExtOp == Instruction::CastOps::SExt) &&
26662663
"VPMulAccumulateReductionRecipe only support zext and sext.");
2664+
setUnderlyingValue(R->getUnderlyingValue());
26672665
// Only set the non-negative flag if the original recipe contains.
26682666
if (Ext0->hasNonNegFlag())
26692667
IsNonNeg = Ext0->isNonNeg();
@@ -2681,6 +2679,7 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
26812679
Instruction::Add &&
26822680
"The reduction instruction in MulAccumulateReductionRecipe must be "
26832681
"Add");
2682+
setUnderlyingValue(R->getUnderlyingValue());
26842683
}
26852684

26862685
~VPMulAccumulateReductionRecipe() override = default;
@@ -2699,10 +2698,6 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
26992698
"VPWidenRecipe + VPReductionRecipe before execution");
27002699
}
27012700

2702-
/// Return the cost of VPMulAccumulateReductionRecipe.
2703-
InstructionCost computeCost(ElementCount VF,
2704-
VPCostContext &Ctx) const override;
2705-
27062701
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
27072702
/// Print the recipe.
27082703
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+7-32
Original file line numberDiff line numberDiff line change
@@ -2488,49 +2488,24 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
24882488
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
24892489
unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
24902490
FastMathFlags FMFs = getFastMathFlags();
2491-
std::optional<FastMathFlags> OptionalFMF =
2492-
ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
24932491

24942492
// TODO: Support any-of reductions.
24952493
assert(
24962494
(!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||
24972495
ForceTargetInstructionCost.getNumOccurrences() > 0) &&
24982496
"Any-of reduction not implemented in VPlan-based cost model currently.");
24992497

2498+
// Cost = Reduction cost + BinOp cost
2499+
InstructionCost Cost =
2500+
Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
25002501
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {
25012502
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
2502-
return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
2503+
return Cost +
2504+
Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
25032505
}
25042506

2505-
return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
2506-
Ctx.CostKind);
2507-
}
2508-
2509-
InstructionCost
2510-
VPExtendedReductionRecipe::computeCost(ElementCount VF,
2511-
VPCostContext &Ctx) const {
2512-
unsigned Opcode = RecurrenceDescriptor::getOpcode(getRecurrenceKind());
2513-
Type *RedTy = Ctx.Types.inferScalarType(this);
2514-
auto *SrcVecTy =
2515-
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
2516-
assert(RedTy->isIntegerTy() &&
2517-
"ExtendedReduction only support integer type currently.");
2518-
InstructionCost Cost = Ctx.TTI.getExtendedReductionCost(
2519-
Opcode, isZExt(), RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
2520-
// The cost of this recipe should be decided by the legacy model.
2521-
return Cost.isValid() ? 0 : Cost;
2522-
}
2523-
2524-
InstructionCost
2525-
VPMulAccumulateReductionRecipe::computeCost(ElementCount VF,
2526-
VPCostContext &Ctx) const {
2527-
Type *RedTy = Ctx.Types.inferScalarType(this);
2528-
auto *SrcVecTy =
2529-
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
2530-
InstructionCost Cost =
2531-
Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy, Ctx.CostKind);
2532-
// The cost of this recipe should be decided by the legacy model.
2533-
return Cost.isValid() ? 0 : Cost;
2507+
return Cost + Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs,
2508+
Ctx.CostKind);
25342509
}
25352510

25362511
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,12 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re
283283
; CHECK-NEXT: <x1> vector loop: {
284284
; CHECK-NEXT: vector.body:
285285
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
286-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, vp<[[RDX_NEXT:%.+]]>
286+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
287287
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
288288
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
289289
; CHECK-NEXT: vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx>
290290
; CHECK-NEXT: WIDEN ir<[[LOAD:%.+]]> = load vp<[[ADDR]]>
291-
; CHECK-NEXT: EXTENDED-REDUCE vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> extended to i64)
291+
; CHECK-NEXT: EXTENDED-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> extended to i64)
292292
; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
293293
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
294294
; CHECK-NEXT: No successors
@@ -327,15 +327,15 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i
327327
; CHECK-NEXT: <x1> vector loop: {
328328
; CHECK-NEXT: vector.body:
329329
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
330-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, vp<[[RDX_NEXT:%.+]]>
330+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
331331
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
332332
; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
333333
; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>
334334
; CHECK-NEXT: WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]>
335335
; CHECK-NEXT: CLONE ir<[[ARRAYIDX1:%.+]]> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
336336
; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]>
337337
; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]>
338-
; CHECK-NEXT: MULACC-REDUCE vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul nsw ir<[[LOAD0]]>, ir<[[LOAD1]]>)
338+
; CHECK-NEXT: MULACC-REDUCE ir<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul nsw ir<[[LOAD0]]>, ir<[[LOAD1]]>)
339339
; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
340340
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
341341
; CHECK-NEXT: No successors
@@ -376,15 +376,15 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado
376376
; CHECK-NEXT: <x1> vector loop: {
377377
; CHECK-NEXT: vector.body:
378378
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
379-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, vp<[[RDX_NEXT:%.+]]>
379+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
380380
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
381381
; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
382382
; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>
383383
; CHECK-NEXT: WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]>
384384
; CHECK-NEXT: CLONE ir<[[ARRAYIDX1:%.+]]> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
385385
; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]>
386386
; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]>
387-
; CHECK-NEXT: MULACC-REDUCE vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> extended to i64), (ir<[[LOAD1]]> extended to i64))
387+
; CHECK-NEXT: MULACC-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> extended to i64), (ir<[[LOAD1]]> extended to i64))
388388
; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
389389
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
390390
; CHECK-NEXT: No successors

0 commit comments

Comments
 (0)