Skip to content

Commit 0e70289

Browse files
committed
[VPlan] Create canonical IV resume value for epilogue in VPlan. (NFCI)
Update the code to create induction resume PHIs to also create a resume phi for the canonical induction during epilogue vectorization. This unifies the code for handling induction resume values and removes the need to explicitly create manually resume PHI and return it during epilogue creation. Overall it helps to move the code for updating the canonical induction resume value to the place where all other header phi resume values are updated. This is NFC, modulo order of the created phis.
1 parent a13ec9c commit 0e70289

13 files changed

+105
-97
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 83 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -487,12 +487,11 @@ class InnerLoopVectorizer {
487487
/// on, while the old loop will be used as the scalar remainder. Control flow
488488
/// is generated around the vectorized (and scalar epilogue) loops consisting
489489
/// of various checks and bypasses. Return the pre-header block of the new
490-
/// loop and the start value for the canonical induction, if it is != 0. The
491-
/// latter is the case when vectorizing the epilogue loop. In the case of
492-
/// epilogue vectorization, this function is overriden to handle the more
493-
/// complex control flow around the loops. \p ExpandedSCEVs is used to
494-
/// look up SCEV expansions for expressions needed during skeleton creation.
495-
virtual std::pair<BasicBlock *, Value *>
490+
/// loop. In the case of epilogue vectorization, this function is overriden to
491+
/// handle the more complex control flow around the loops. \p ExpandedSCEVs is
492+
/// used to look up SCEV expansions for expressions needed during skeleton
493+
/// creation.
494+
virtual BasicBlock *
496495
createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs);
497496

498497
/// Fix the vectorized code, taking care of header phi's, and more.
@@ -747,15 +746,15 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
747746

748747
// Override this function to handle the more complex control flow around the
749748
// three loops.
750-
std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton(
751-
const SCEV2ValueTy &ExpandedSCEVs) final {
749+
BasicBlock *
750+
createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final {
752751
return createEpilogueVectorizedLoopSkeleton(ExpandedSCEVs);
753752
}
754753

755754
/// The interface for creating a vectorized skeleton using one of two
756755
/// different strategies, each corresponding to one execution of the vplan
757756
/// as described above.
758-
virtual std::pair<BasicBlock *, Value *>
757+
virtual BasicBlock *
759758
createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) = 0;
760759

761760
/// Holds and updates state information required to vectorize the main loop
@@ -784,7 +783,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
784783
EPI, LVL, CM, BFI, PSI, Check, Plan) {}
785784
/// Implements the interface for creating a vectorized skeleton using the
786785
/// *main loop* strategy (ie the first pass of vplan execution).
787-
std::pair<BasicBlock *, Value *>
786+
BasicBlock *
788787
createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
789788

790789
protected:
@@ -819,7 +818,7 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
819818
}
820819
/// Implements the interface for creating a vectorized skeleton using the
821820
/// *epilogue loop* strategy (ie the second pass of vplan execution).
822-
std::pair<BasicBlock *, Value *>
821+
BasicBlock *
823822
createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
824823

825824
protected:
@@ -2716,6 +2715,7 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
27162715
// Otherwise we provide the trip count from the main vector loop.
27172716
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader();
27182717
VPBuilder ScalarPHBuilder(ScalarPHVPBB, ScalarPHVPBB->begin());
2718+
bool HasCanonical = false;
27192719
for (VPRecipeBase &R : *Plan.getScalarHeader()) {
27202720
auto *PhiR = cast<VPIRInstruction>(&R);
27212721
auto *Phi = dyn_cast<PHINode>(&PhiR->getInstruction());
@@ -2728,11 +2728,25 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
27282728
createInductionResumeVPValue(PhiR, II, getExpandedStep(II, ExpandedSCEVs),
27292729
LoopBypassBlocks, ScalarPHBuilder,
27302730
MainVectorTripCount);
2731+
auto *ConstStart = dyn_cast<ConstantInt>(II.getStartValue());
2732+
auto *ConstStep = II.getConstIntStepValue();
2733+
if (Phi->getType() == VectorTripCount->getType() && ConstStart &&
2734+
ConstStart->isZero() && ConstStep && ConstStep->isOne())
2735+
HasCanonical = true;
27312736
}
2737+
2738+
if (!IVSubset || HasCanonical)
2739+
return;
2740+
// When vectorizing the epilogue, create a resume phi for the canonical IV if
2741+
// no suitable resume phi was already created.
2742+
ScalarPHBuilder.createNaryOp(
2743+
VPInstruction::ResumePhi,
2744+
{Plan.getOrAddLiveIn(VectorTripCount),
2745+
Plan.getOrAddLiveIn(ConstantInt::get(VectorTripCount->getType(), 0))},
2746+
{}, "vec.epilog.resume.val");
27322747
}
27332748

2734-
std::pair<BasicBlock *, Value *>
2735-
InnerLoopVectorizer::createVectorizedLoopSkeleton(
2749+
BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
27362750
const SCEV2ValueTy &ExpandedSCEVs) {
27372751
/*
27382752
In this function we generate a new loop. The new loop will contain
@@ -2792,7 +2806,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
27922806
// Emit phis for the new starting index of the scalar loop.
27932807
createInductionResumeVPValues(ExpandedSCEVs);
27942808

2795-
return {LoopVectorPreHeader, nullptr};
2809+
return LoopVectorPreHeader;
27962810
}
27972811

27982812
// Fix up external users of the induction variable. At this point, we are
@@ -7740,10 +7754,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77407754

77417755
// 1. Set up the skeleton for vectorization, including vector pre-header and
77427756
// middle block. The vector loop is created during VPlan execution.
7743-
Value *CanonicalIVStartValue;
7744-
std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
7745-
ILV.createVectorizedLoopSkeleton(ExpandedSCEVs ? *ExpandedSCEVs
7746-
: State.ExpandedSCEVs);
7757+
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton(
7758+
ExpandedSCEVs ? *ExpandedSCEVs : State.ExpandedSCEVs);
77477759
if (VectorizingEpilogue)
77487760
VPlanTransforms::removeDeadRecipes(BestVPlan);
77497761

@@ -7781,8 +7793,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77817793

77827794
// 2. Copy and widen instructions from the old loop into the new loop.
77837795
BestVPlan.prepareToExecute(ILV.getTripCount(),
7784-
ILV.getOrCreateVectorTripCount(nullptr),
7785-
CanonicalIVStartValue, State);
7796+
ILV.getOrCreateVectorTripCount(nullptr), State);
77867797
VPlanTransforms::convertToConcreteRecipes(BestVPlan);
77877798

77887799
BestVPlan.execute(&State);
@@ -7859,8 +7870,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78597870

78607871
/// This function is partially responsible for generating the control flow
78617872
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7862-
std::pair<BasicBlock *, Value *>
7863-
EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
7873+
BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78647874
const SCEV2ValueTy &ExpandedSCEVs) {
78657875
createVectorLoopSkeleton("");
78667876

@@ -7904,7 +7914,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
79047914
}
79057915
createInductionResumeVPValues(ExpandedSCEVs, nullptr, &WideIVs);
79067916

7907-
return {LoopVectorPreHeader, nullptr};
7917+
return LoopVectorPreHeader;
79087918
}
79097919

79107920
void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
@@ -7984,7 +7994,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
79847994

79857995
/// This function is partially responsible for generating the control flow
79867996
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
7987-
std::pair<BasicBlock *, Value *>
7997+
BasicBlock *
79887998
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79897999
const SCEV2ValueTy &ExpandedSCEVs) {
79908000
createVectorLoopSkeleton("vec.epilog.");
@@ -8068,30 +8078,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80688078
Phi->removeIncomingValue(EPI.MemSafetyCheck);
80698079
}
80708080

8071-
// Generate a resume phi for the canonical induction of the vector epilogue
8072-
// and put it in the vector epilogue preheader, unless such a phi already
8073-
// exists there - and can be reused.
8074-
PHINode *EPResumeVal = nullptr;
8075-
Type *IdxTy = Legal->getWidestInductionType();
8076-
Value *TC = EPI.VectorTripCount;
8077-
Constant *Init = ConstantInt::get(IdxTy, 0);
8078-
8079-
for (PHINode &P : LoopVectorPreHeader->phis()) {
8080-
if (P.getType() == IdxTy &&
8081-
P.getIncomingValueForBlock(VecEpilogueIterationCountCheck) == TC &&
8082-
P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck) == Init) {
8083-
EPResumeVal = &P;
8084-
EPResumeVal->setName("vec.epilog.resume.val");
8085-
break;
8086-
}
8087-
}
8088-
if (!EPResumeVal) {
8089-
EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
8090-
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
8091-
EPResumeVal->addIncoming(TC, VecEpilogueIterationCountCheck);
8092-
EPResumeVal->addIncoming(Init, EPI.MainLoopIterationCountCheck);
8093-
}
8094-
80958081
// Generate induction resume values. These variables save the new starting
80968082
// indexes for the scalar loop. They are used to test if there are any tail
80978083
// iterations left once the vector loop has completed.
@@ -8100,7 +8086,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
81008086
// the trip count of the main vector loop, passed as the second argument.
81018087
createInductionResumeVPValues(ExpandedSCEVs, EPI.VectorTripCount);
81028088

8103-
return {LoopVectorPreHeader, EPResumeVal};
8089+
return LoopVectorPreHeader;
81048090
}
81058091

81068092
BasicBlock *
@@ -9993,7 +9979,8 @@ LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
99939979
/// SCEVs from \p ExpandedSCEVs and set resume values for header recipes.
99949980
static void
99959981
preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
9996-
const SCEV2ValueTy &ExpandedSCEVs) {
9982+
const SCEV2ValueTy &ExpandedSCEVs,
9983+
const EpilogueLoopVectorizationInfo &EPI) {
99979984
VPRegionBlock *VectorLoop = Plan.getVectorLoopRegion();
99989985
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
99999986
Header->setName("vec.epilog.vector.body");
@@ -10016,12 +10003,53 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1001610003
ExpandR->eraseFromParent();
1001710004
}
1001810005

10019-
// Ensure that the start values for any VPWidenIntOrFpInductionRecipe,
10020-
// VPWidenPointerInductionRecipe and VPReductionPHIRecipes are updated
10021-
// before vectorizing the epilogue loop.
10006+
// Ensure that the start values for all header phi recipes are updated before
10007+
// vectorizing the epilogue loop.
1002210008
for (VPRecipeBase &R : Header->phis()) {
10023-
if (isa<VPCanonicalIVPHIRecipe>(&R))
10009+
if (auto *IV = dyn_cast<VPCanonicalIVPHIRecipe>(&R)) {
10010+
// When vectorizing the epilogue loop, the canonical induction start
10011+
// value needs to be changed from zero to the value after the main
10012+
// vector loop. Find the resume value created during execution of the main
10013+
// VPlan.
10014+
// FIXME: Improve modeling for canonical IV start values in the epilogue
10015+
// loop.
10016+
BasicBlock *MainMiddle = find_singleton<BasicBlock>(
10017+
predecessors(L->getLoopPreheader()),
10018+
[&EPI](BasicBlock *BB, bool) -> BasicBlock * {
10019+
if (BB != EPI.MainLoopIterationCountCheck &&
10020+
BB != EPI.EpilogueIterationCountCheck &&
10021+
BB != EPI.SCEVSafetyCheck && BB != EPI.MemSafetyCheck)
10022+
return BB;
10023+
return nullptr;
10024+
});
10025+
using namespace llvm::PatternMatch;
10026+
Type *IdxTy = IV->getScalarType();
10027+
PHINode *EPResumeVal = find_singleton<PHINode>(
10028+
L->getLoopPreheader()->phis(),
10029+
[&EPI, IdxTy, MainMiddle](PHINode &P, bool) -> PHINode * {
10030+
if (P.getType() == IdxTy &&
10031+
P.getIncomingValueForBlock(MainMiddle) == EPI.VectorTripCount &&
10032+
match(
10033+
P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck),
10034+
m_SpecificInt(0)))
10035+
return &P;
10036+
return nullptr;
10037+
});
10038+
assert(EPResumeVal && "must have a resume value for the canonical IV");
10039+
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
10040+
assert(all_of(IV->users(),
10041+
[](const VPUser *U) {
10042+
return isa<VPScalarIVStepsRecipe>(U) ||
10043+
isa<VPScalarCastRecipe>(U) ||
10044+
isa<VPDerivedIVRecipe>(U) ||
10045+
cast<VPInstruction>(U)->getOpcode() ==
10046+
Instruction::Add;
10047+
}) &&
10048+
"the canonical IV should only be used by its increment or "
10049+
"ScalarIVSteps when resetting the start value");
10050+
IV->setOperand(0, VPV);
1002410051
continue;
10052+
}
1002510053

1002610054
Value *ResumeV = nullptr;
1002710055
// TODO: Move setting of resume values to prepareToExecute.
@@ -10425,7 +10453,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1042510453
ORE, EPI, &LVL, &CM, BFI, PSI,
1042610454
Checks, BestEpiPlan);
1042710455
EpilogILV.setTripCount(MainILV.getTripCount());
10428-
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs);
10456+
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
1042910457

1043010458
assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
1043110459
"DT not preserved correctly");

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,6 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
927927
}
928928

929929
void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
930-
Value *CanonicalIVStartValue,
931930
VPTransformState &State) {
932931
Type *TCTy = TripCountV->getType();
933932
// Check if the backedge taken count is needed, and if so build it.
@@ -953,25 +952,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
953952
} else {
954953
VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
955954
}
956-
957-
// When vectorizing the epilogue loop, the canonical induction start value
958-
// needs to be changed from zero to the value after the main vector loop.
959-
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
960-
if (CanonicalIVStartValue) {
961-
VPValue *VPV = getOrAddLiveIn(CanonicalIVStartValue);
962-
auto *IV = getCanonicalIV();
963-
assert(all_of(IV->users(),
964-
[](const VPUser *U) {
965-
return isa<VPScalarIVStepsRecipe>(U) ||
966-
isa<VPScalarCastRecipe>(U) ||
967-
isa<VPDerivedIVRecipe>(U) ||
968-
cast<VPInstruction>(U)->getOpcode() ==
969-
Instruction::Add;
970-
}) &&
971-
"the canonical IV should only be used by its increment or "
972-
"ScalarIVSteps when resetting the start value");
973-
IV->setOperand(0, VPV);
974-
}
975955
}
976956

977957
/// Replace \p VPBB with a VPIRBasicBlock wrapping \p IRBB. All recipes from \p

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3856,7 +3856,7 @@ class VPlan {
38563856

38573857
/// Prepare the plan for execution, setting up the required live-in values.
38583858
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3859-
Value *CanonicalIVStartValue, VPTransformState &State);
3859+
VPTransformState &State);
38603860

38613861
/// Generate the IR code for this VPlan.
38623862
void execute(VPTransformState *State);

llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) {
5757
; INTERLEAVE-4-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
5858
; INTERLEAVE-4-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
5959
; INTERLEAVE-4: vec.epilog.ph:
60-
; INTERLEAVE-4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
6160
; INTERLEAVE-4-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
61+
; INTERLEAVE-4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
6262
; INTERLEAVE-4-NEXT: [[N_MOD_VF10:%.*]] = urem i64 [[N]], 4
6363
; INTERLEAVE-4-NEXT: [[N_VEC11:%.*]] = sub i64 [[N]], [[N_MOD_VF10]]
6464
; INTERLEAVE-4-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ define i64 @int_reduction_and(ptr noalias nocapture %a, i64 %N) {
5050
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
5151
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
5252
; CHECK: vec.epilog.ph:
53-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
5453
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
54+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
5555
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
5656
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
5757
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ define i64 @int_reduction_add(ptr %a, i64 %N) {
4949
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
5050
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
5151
; CHECK: vec.epilog.ph:
52-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP21]], [[VEC_EPILOG_ITER_CHECK]] ], [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
5352
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
53+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP21]], [[VEC_EPILOG_ITER_CHECK]] ], [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
5454
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
5555
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
5656
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ define float @fadd_strict(ptr noalias nocapture readonly %a, i64 %n) {
4646
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
4747
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
4848
; CHECK: vec.epilog.ph:
49-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
5049
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
50+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
5151
; CHECK-NEXT: [[N_MOD_VF3:%.*]] = urem i64 [[N]], 2
5252
; CHECK-NEXT: [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]]
5353
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
226226
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
227227
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
228228
; CHECK: vec.epilog.ph:
229-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP124]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
230229
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
230+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP124]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
231231
; CHECK-NEXT: [[N_MOD_VF7:%.*]] = urem i64 [[TMP2]], 4
232232
; CHECK-NEXT: [[N_VEC8:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF7]]
233233
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC8]], 32

0 commit comments

Comments
 (0)