Skip to content

Commit ce95f18

Browse files
committed
!fixup, Remove computeCost() for new recipes.
1 parent a4077bc commit ce95f18

File tree

3 files changed

+16
-46
lines changed

3 files changed

+16
-46
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2642,6 +2642,7 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
26422642
// Not all WidenCastRecipes contain nneg flag. Need to transfer flags from
26432643
// the original recipe to prevent setting wrong flags.
26442644
transferFlags(*Ext);
2645+
setUnderlyingValue(R->getUnderlyingValue());
26452646
}
26462647

26472648
~VPExtendedReductionRecipe() override = default;
@@ -2657,10 +2658,6 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
26572658
"VPExtendedRecipe + VPReductionRecipe before execution.");
26582659
};
26592660

2660-
/// Return the cost of VPExtendedReductionRecipe.
2661-
InstructionCost computeCost(ElementCount VF,
2662-
VPCostContext &Ctx) const override;
2663-
26642661
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
26652662
/// Print the recipe.
26662663
void print(raw_ostream &O, const Twine &Indent,
@@ -2720,6 +2717,7 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
27202717
assert((ExtOp == Instruction::CastOps::ZExt ||
27212718
ExtOp == Instruction::CastOps::SExt) &&
27222719
"VPMulAccumulateReductionRecipe only support zext and sext.");
2720+
setUnderlyingValue(R->getUnderlyingValue());
27232721
// Only set the non-negative flag if the original recipe contains.
27242722
if (Ext0->hasNonNegFlag())
27252723
IsNonNeg = Ext0->isNonNeg();
@@ -2737,6 +2735,7 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
27372735
Instruction::Add &&
27382736
"The reduction instruction in MulAccumulateReductionRecipe must be "
27392737
"Add");
2738+
setUnderlyingValue(R->getUnderlyingValue());
27402739
}
27412740

27422741
~VPMulAccumulateReductionRecipe() override = default;
@@ -2755,10 +2754,6 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
27552754
"VPWidenRecipe + VPReductionRecipe before execution");
27562755
}
27572756

2758-
/// Return the cost of VPMulAccumulateReductionRecipe.
2759-
InstructionCost computeCost(ElementCount VF,
2760-
VPCostContext &Ctx) const override;
2761-
27622757
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
27632758
/// Print the recipe.
27642759
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2527,49 +2527,24 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
25272527
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
25282528
unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);
25292529
FastMathFlags FMFs = getFastMathFlags();
2530-
std::optional<FastMathFlags> OptionalFMF =
2531-
ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;
25322530

25332531
// TODO: Support any-of reductions.
25342532
assert(
25352533
(!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||
25362534
ForceTargetInstructionCost.getNumOccurrences() > 0) &&
25372535
"Any-of reduction not implemented in VPlan-based cost model currently.");
25382536

2537+
// Cost = Reduction cost + BinOp cost
2538+
InstructionCost Cost =
2539+
Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
25392540
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {
25402541
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
2541-
return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
2542+
return Cost +
2543+
Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
25422544
}
25432545

2544-
return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,
2545-
Ctx.CostKind);
2546-
}
2547-
2548-
InstructionCost
2549-
VPExtendedReductionRecipe::computeCost(ElementCount VF,
2550-
VPCostContext &Ctx) const {
2551-
unsigned Opcode = RecurrenceDescriptor::getOpcode(getRecurrenceKind());
2552-
Type *RedTy = Ctx.Types.inferScalarType(this);
2553-
auto *SrcVecTy =
2554-
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp()), VF));
2555-
assert(RedTy->isIntegerTy() &&
2556-
"ExtendedReduction only support integer type currently.");
2557-
InstructionCost Cost = Ctx.TTI.getExtendedReductionCost(
2558-
Opcode, isZExt(), RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
2559-
// The cost of this recipe should be decided by the legacy model.
2560-
return Cost.isValid() ? 0 : Cost;
2561-
}
2562-
2563-
InstructionCost
2564-
VPMulAccumulateReductionRecipe::computeCost(ElementCount VF,
2565-
VPCostContext &Ctx) const {
2566-
Type *RedTy = Ctx.Types.inferScalarType(this);
2567-
auto *SrcVecTy =
2568-
cast<VectorType>(toVectorTy(Ctx.Types.inferScalarType(getVecOp0()), VF));
2569-
InstructionCost Cost =
2570-
Ctx.TTI.getMulAccReductionCost(isZExt(), RedTy, SrcVecTy, Ctx.CostKind);
2571-
// The cost of this recipe should be decided by the legacy model.
2572-
return Cost.isValid() ? 0 : Cost;
2546+
return Cost + Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs,
2547+
Ctx.CostKind);
25732548
}
25742549

25752550
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,12 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re
283283
; CHECK-NEXT: <x1> vector loop: {
284284
; CHECK-NEXT: vector.body:
285285
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
286-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, vp<[[RDX_NEXT:%.+]]>
286+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
287287
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
288288
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
289289
; CHECK-NEXT: vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx>
290290
; CHECK-NEXT: WIDEN ir<[[LOAD:%.+]]> = load vp<[[ADDR]]>
291-
; CHECK-NEXT: EXTENDED-REDUCE vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> extended to i64)
291+
; CHECK-NEXT: EXTENDED-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> extended to i64)
292292
; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
293293
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
294294
; CHECK-NEXT: No successors
@@ -327,15 +327,15 @@ define i64 @print_mulacc(ptr nocapture readonly %x, ptr nocapture readonly %y, i
327327
; CHECK-NEXT: <x1> vector loop: {
328328
; CHECK-NEXT: vector.body:
329329
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
330-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, vp<[[RDX_NEXT:%.+]]>
330+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
331331
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
332332
; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
333333
; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>
334334
; CHECK-NEXT: WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]>
335335
; CHECK-NEXT: CLONE ir<[[ARRAYIDX1:%.+]]> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
336336
; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]>
337337
; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]>
338-
; CHECK-NEXT: MULACC-REDUCE vp<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul nsw ir<[[LOAD0]]>, ir<[[LOAD1]]>)
338+
; CHECK-NEXT: MULACC-REDUCE ir<[[RDX_NEXT]]> = ir<[[RDX]]> + reduce.add (mul nsw ir<[[LOAD0]]>, ir<[[LOAD1]]>)
339339
; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
340340
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
341341
; CHECK-NEXT: No successors
@@ -376,15 +376,15 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado
376376
; CHECK-NEXT: <x1> vector loop: {
377377
; CHECK-NEXT: vector.body:
378378
; CHECK-NEXT: EMIT vp<[[IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[IV_NEXT:%.+]]>
379-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, vp<[[RDX_NEXT:%.+]]>
379+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[RDX:%.+]]> = phi ir<0>, ir<[[RDX_NEXT:%.+]]>
380380
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1>
381381
; CHECK-NEXT: CLONE ir<[[ARRAYIDX0:%.+]]> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
382382
; CHECK-NEXT: vp<[[ADDR0:%.+]]> = vector-pointer ir<[[ARRAYIDX0]]>
383383
; CHECK-NEXT: WIDEN ir<[[LOAD0:%.+]]> = load vp<[[ADDR0]]>
384384
; CHECK-NEXT: CLONE ir<[[ARRAYIDX1:%.+]]> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
385385
; CHECK-NEXT: vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]>
386386
; CHECK-NEXT: WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]>
387-
; CHECK-NEXT: MULACC-REDUCE vp<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> extended to i64), (ir<[[LOAD1]]> extended to i64))
387+
; CHECK-NEXT: MULACC-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> extended to i64), (ir<[[LOAD1]]> extended to i64))
388388
; CHECK-NEXT: EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
389389
; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
390390
; CHECK-NEXT: No successors

0 commit comments

Comments
 (0)