Skip to content

[LV] Use frozen start value for FindLastIV if needed. #132691

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Apr 4, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3e42683
[VPlan] Add ComputeFindLastIVResult opcode (NFC).
fhahn Mar 23, 2025
c551166
[VPlan] Manage FindLastIV start value in ComputeFindLastIVResult (NFC).
fhahn Mar 23, 2025
4591537
Match
fhahn Mar 23, 2025
ff11744
[LV] Use frozen start value for FindLastIV if needed.
fhahn Mar 22, 2025
ab4681a
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Mar 31, 2025
691d8c3
!fixup address latest comments, thanks
fhahn Mar 31, 2025
cc449a8
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Mar 31, 2025
ab70b60
!fixup limit to epilogue vectorization.
fhahn Mar 31, 2025
10d7b09
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Mar 31, 2025
4e2b58d
!fixup limit to epilogue
fhahn Mar 31, 2025
4fc8fe6
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Mar 31, 2025
094607c
!fixup update tests, unify code.
fhahn Mar 31, 2025
6554b64
!fixup fix formatting.
fhahn Mar 31, 2025
4a8163c
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Apr 1, 2025
099676f
!fixup remove unneeded changes.
fhahn Apr 1, 2025
5979201
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Apr 1, 2025
14362e3
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Apr 2, 2025
3f3306b
!fixup address latest comments, thanks!
fhahn Apr 2, 2025
50598f1
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Apr 2, 2025
db60a65
@fixup remove replaceUsesOfWith again.
fhahn Apr 2, 2025
177002a
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Apr 3, 2025
557a5b9
Merge remote-tracking branch 'origin/main' into findlastiv-poison-safe
fhahn Apr 3, 2025
3df74b7
!fixup address latest comments, reorder to handle more IVs.
fhahn Apr 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@ Value *createAnyOfReduction(IRBuilderBase &B, Value *Src,
/// Create a reduction of the given vector \p Src for a reduction of the
/// kind RecurKind::IFindLastIV or RecurKind::FFindLastIV. The reduction
/// operation is described by \p Desc.
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src, Value *Start,
const RecurrenceDescriptor &Desc);

/// Create an ordered reduction intrinsic using the given recurrence
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1233,11 +1233,11 @@ Value *llvm::createAnyOfReduction(IRBuilderBase &Builder, Value *Src,
}

Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
Value *Start,
const RecurrenceDescriptor &Desc) {
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
Desc.getRecurrenceKind()) &&
"Unexpected reduction kind");
Value *StartVal = Desc.getRecurrenceStartValue();
Value *Sentinel = Desc.getSentinelValue();
Value *MaxRdx = Src->getType()->isVectorTy()
? Builder.CreateIntMaxReduce(Src, true)
Expand All @@ -1246,7 +1246,7 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
// reduction is sentinel value.
Value *Cmp =
Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
return Builder.CreateSelect(Cmp, MaxRdx, StartVal, "rdx.select");
return Builder.CreateSelect(Cmp, MaxRdx, Start, "rdx.select");
}

Value *llvm::getReductionIdentity(Intrinsic::ID RdxID, Type *Ty,
Expand Down
104 changes: 74 additions & 30 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7612,7 +7612,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
BasicBlock *BypassBlock) {
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
if (!EpiRedResult ||
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult)
(EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
return;

auto *EpiRedHeaderPhi =
Expand All @@ -7633,14 +7634,17 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
using namespace llvm::PatternMatch;
Value *Cmp, *OrigResumeV;
Value *Cmp, *OrigResumeV, *CmpOp;
bool IsExpectedPattern =
match(MainResumeValue, m_Select(m_OneUse(m_Value(Cmp)),
m_Specific(RdxDesc.getSentinelValue()),
m_Value(OrigResumeV))) &&
match(Cmp,
m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
m_Specific(RdxDesc.getRecurrenceStartValue())));
(match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
m_Value(CmpOp))) &&
(match(CmpOp,
m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) ||
(CmpOp == RdxDesc.getRecurrenceStartValue() &&
isGuaranteedNotToBeUndefOrPoison(CmpOp))));
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
(void)IsExpectedPattern;
MainResumeValue = OrigResumeV;
Expand Down Expand Up @@ -9817,8 +9821,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createSelect(Cond, OrigExitingVPV, PhiR, {}, "", FMFs);
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
return isa<VPInstruction>(&U) &&
cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeReductionResult;
(cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeReductionResult ||
cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeFindLastIVResult);
});
if (CM.usePredicatedReductionSelect(
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy))
Expand Down Expand Up @@ -9861,10 +9867,36 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// bc.merge.rdx phi nodes, hence it needs to be created unconditionally here
// even for in-loop reductions, until the reduction resume value handling is
// also modeled in VPlan.
VPInstruction *FinalReductionResult;
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(MiddleVPBB, IP);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why has this been sunk into the if/else cases when we are inserting into the same location in each case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restored, this was left over after moving the code

auto *FinalReductionResult = Builder.createNaryOp(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
VPValue *Start = PhiR->getStartValue();
if (!isGuaranteedNotToBeUndefOrPoison(
PhiR->getStartValue()->getLiveInIRValue())) {
Builder.setInsertPoint(cast<VPBasicBlock>(Plan->getEntry()));
Start = Builder.createNaryOp(Instruction::Freeze, {Start}, {}, "fr");
}
Builder.setInsertPoint(MiddleVPBB, IP);
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
// Update all users outside the vector region.
for (VPUser *U : to_vector(OrigExitingVPV->users())) {
auto *R = cast<VPRecipeBase>(U);
if (R->getParent() && R->getParent()->getParent())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks to be based on the assumption that the parent's parent is a vector loop region. Is it worth asserting this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to check that the parent's region is the vector loop region, thanks

continue;

for (unsigned Idx = 0; Idx != R->getNumOperands(); ++Idx) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a commonly-used idiom, something we probably have an equivalent IR helper for. Is it worth adding a helper in VPUser?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added VPUser::replaceUsesOfWith in line with User::replaceUsesOfWith

if (R->getOperand(Idx) == PhiR->getStartValue())
R->setOperand(Idx, Start);
}
}
} else {
Builder.setInsertPoint(MiddleVPBB, IP);
FinalReductionResult = Builder.createNaryOp(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
}
// Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
FinalReductionResult, [FinalReductionResult](VPUser &User, unsigned) {
Expand Down Expand Up @@ -10353,24 +10385,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");

// Re-use the trip count and steps expanded for the main loop, as
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess in theory this code could be moved to the end of the function in a separate NFC patch, with this patch then adding the new bits:

    auto *VPI = dyn_cast<VPInstruction>(&R);
    if (VPI) {
      VPI->replaceAllUsesWith(Plan.getOrAddLiveIn(
          ToFrozen[VPI->getOperand(0)->getLiveInIRValue()]));
      continue;
    }

However, I don't want to create unnecessary burden as I know this is an important fix. I'll leave it up to you!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left it included in the patch for now, could also split it off.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fine. 👍

// skeleton creation needs it as a value that dominates both the scalar
// and vector epilogue loops
// TODO: This is a workaround needed for epilogue vectorization and it
// should be removed once induction resume value creation is done
// directly in VPlan.
for (auto &R : make_early_inc_range(*Plan.getEntry())) {
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpandR)
continue;
auto *ExpandedVal =
Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second);
ExpandR->replaceAllUsesWith(ExpandedVal);
if (Plan.getTripCount() == ExpandR)
Plan.resetTripCount(ExpandedVal);
ExpandR->eraseFromParent();
}

DenseMap<Value *, Value *> ToFrozen;
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
for (VPRecipeBase &R : Header->phis()) {
Expand Down Expand Up @@ -10436,6 +10451,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
ResumeV =
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
ToFrozen[RdxDesc.getRecurrenceStartValue()] =
cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This confused me at first because there is no obvious indication that EPI.MainLoopIterationCountCheck is a Block pointer. Doesn't need doing in this PR, but it would be good to rename MainLoopIterationCountCheck to something like MainLoopIterationCountCheckBlock.


// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
// to the resume value. The resume value is adjusted to the sentinel
// value when the final value from the main vector loop equals the start
Expand All @@ -10444,8 +10463,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
// variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
Value *Cmp =
Builder.CreateICmpEQ(ResumeV, RdxDesc.getRecurrenceStartValue());
Value *Cmp = Builder.CreateICmpEQ(
ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]);
ResumeV =
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
}
Expand All @@ -10461,6 +10480,31 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
VPValue *StartVal = Plan.getOrAddLiveIn(ResumeV);
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}

// Re-use the trip count and steps expanded for the main loop, as
// skeleton creation needs it as a value that dominates both the scalar
// and vector epilogue loops
// TODO: This is a workaround needed for epilogue vectorization and it
// should be removed once induction resume value creation is done
// directly in VPlan.
for (auto &R : make_early_inc_range(*Plan.getEntry())) {
auto *VPI = dyn_cast<VPInstruction>(&R);
if (VPI) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What guarantee is there that VPI corresponds to the frozen start value? Do we need to check for VPInstructions with opcode Instruction::Freeze? I assume this is supposed to match up with the VPInstructions added by AddFreezeForFindLastIVReductions above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, there can only be freeze VPInstruction in the header, updated to check though

VPI->replaceAllUsesWith(Plan.getOrAddLiveIn(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand what you're doing here, but it looks a bit odd at first glance. You're essentially replacing one freeze in the epilogue entry block with another from the MainLoopIterationCountCheck block, right? Perhaps worth a comment explaining what's happening?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, I also moved the comment about re-using the trip count inside the loop, thanks

ToFrozen[VPI->getOperand(0)->getLiveInIRValue()]));
continue;
}

auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpandR)
continue;
auto *ExpandedVal =
Plan.getOrAddLiveIn(ExpandedSCEVs.find(ExpandR->getSCEV())->second);
ExpandR->replaceAllUsesWith(ExpandedVal);
if (Plan.getTripCount() == ExpandR)
Plan.resetTripCount(ExpandedVal);
ExpandR->eraseFromParent();
}
}

// Generate bypass values from the additional bypass block. Note that when the
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
ComputeFindLastIVResult,
ComputeReductionResult,
// Takes the VPValue to extract from as first operand and the lane or part
// to extract as second operand, counting from the end starting with 1 for
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {

switch (Opcode) {
case Instruction::ExtractElement:
case Instruction::Freeze:
return inferScalarType(R->getOperand(0));
case Instruction::Select: {
Type *ResTy = inferScalarType(R->getOperand(1));
Expand All @@ -66,6 +67,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ using BinaryVPInstruction_match =
BinaryRecipe_match<Op0_t, Op1_t, Opcode, /*Commutative*/ false,
VPInstruction>;

template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode,
bool Commutative, typename... RecipeTys>
using TernaryRecipe_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>,
Opcode, Commutative, RecipeTys...>;

template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
using TernaryVPInstruction_match =
TernaryRecipe_match<Op0_t, Op1_t, Op2_t, Opcode, /*Commutative*/ false,
VPInstruction>;

template <typename Op0_t, typename Op1_t, unsigned Opcode,
bool Commutative = false>
using AllBinaryRecipe_match =
Expand All @@ -234,6 +244,13 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1) {
return BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>(Op0, Op1);
}

template <unsigned Opcode, typename Op0_t, typename Op1_t, typename Op2_t>
inline TernaryVPInstruction_match<Op0_t, Op1_t, Op2_t, Opcode>
m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
return TernaryVPInstruction_match<Op0_t, Op1_t, Op2_t, Opcode>(
{Op0, Op1, Op2});
}

template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::Not>
m_Not(const Op0_t &Op0) {
Expand Down
48 changes: 39 additions & 9 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
if (isSingleScalar() || isVectorToScalar())
return true;
switch (Opcode) {
case Instruction::Freeze:
case Instruction::ICmp:
case Instruction::PHI:
case Instruction::Select:
Expand Down Expand Up @@ -474,6 +475,12 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Idx = State.get(getOperand(1), /*IsScalar=*/true);
return Builder.CreateExtractElement(Vec, Idx, Name);
}
case Instruction::Freeze: {
if (State.hasVectorValue(this))
return State.get(this);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will we still generate a freeze instruction if we have a vector value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check was left over from earlier version, removed, thanks

Value *Op = State.get(getOperand(0), true);
return Builder.CreateFreeze(Op, Name);
}
case Instruction::ICmp: {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
Expand Down Expand Up @@ -614,6 +621,28 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
case VPInstruction::ComputeFindLastIVResult: {
// The recipe's operands are the reduction phi, followed by one operand for
// each part of the reduction.
unsigned UF = getNumOperands() - 2;
Value *ReducedPartRdx = State.get(getOperand(2));
for (unsigned Part = 1; Part < UF; ++Part) {
ReducedPartRdx = createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx,
State.get(getOperand(2 + Part)));
}

// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
// Get its reduction variable descriptor.
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();

assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK));
assert(!PhiR->isInLoop());
return createFindLastIVReduction(Builder, ReducedPartRdx,
State.get(getOperand(1), true), RdxDesc);
}
case VPInstruction::ComputeReductionResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
Expand All @@ -623,6 +652,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();

RecurKind RK = RdxDesc.getRecurrenceKind();
assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
"should be handled by ComputeFindLastIVResult");

Type *PhiTy = OrigPhi->getType();
// The recipe's operands are the reduction phi, followed by one operand for
Expand Down Expand Up @@ -658,9 +689,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
ReducedPartRdx =
createMinMaxOp(Builder, RecurKind::SMax, ReducedPartRdx, RdxPart);
else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
}
Expand All @@ -669,8 +697,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
if ((State.VF.isVector() ||
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
!PhiR->isInLoop()) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
Expand All @@ -681,9 +708,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
ReducedPartRdx =
createAnyOfReduction(Builder, ReducedPartRdx, RdxDesc, OrigPhi);
else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK))
ReducedPartRdx =
createFindLastIVReduction(Builder, ReducedPartRdx, RdxDesc);
else
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);

Expand Down Expand Up @@ -829,6 +853,7 @@ bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractFromEnd ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeFindLastIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
Expand Down Expand Up @@ -921,6 +946,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case Instruction::ICmp:
case Instruction::Select:
case Instruction::Or:
case Instruction::Freeze:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably also add this opcode to VPInstruction::opcodeMayReadOrWriteFromMemory and return false.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks

// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
case VPInstruction::ActiveLaneMask:
Expand All @@ -933,6 +959,8 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::ComputeFindLastIVResult:
return Op == getOperand(1);
};
llvm_unreachable("switch should return");
}
Expand Down Expand Up @@ -1011,6 +1039,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractFromEnd:
O << "extract-from-end";
break;
case VPInstruction::ComputeFindLastIVResult:
O << "compute-find-last-iv-result";
break;
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
Expand Down Expand Up @@ -1571,7 +1602,6 @@ void VPWidenRecipe::execute(VPTransformState &State) {
}
case Instruction::Freeze: {
Value *Op = State.get(getOperand(0));

Value *Freeze = Builder.CreateFreeze(Op);
State.set(this, Freeze);
break;
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,9 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// the parts to compute the final reduction value.
VPValue *Op1;
if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
m_VPValue(), m_VPValue(Op1)))) {
m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
for (unsigned Part = 1; Part != UF; ++Part)
R.addOperand(getValueForPart(Op1, Part));
Expand Down
Loading