Skip to content

Commit 3b414ba

Browse files
committed
[VPlan] Compute induction end values in VPlan.
Use createDerivedIV to compute IV end values directly in VPlan, instead of creating them up-front. This allows updating IV users outside the loop as follow-up. Depends on #110004 and #109975.
1 parent 5c7833b commit 3b414ba

33 files changed

+347
-247
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,7 @@ class VPBuilder {
233233

234234
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
235235
FPMathOperator *FPBinOp, VPValue *Start,
236-
VPCanonicalIVPHIRecipe *CanonicalIV,
237-
VPValue *Step) {
236+
VPValue *CanonicalIV, VPValue *Step) {
238237
return tryInsertInstruction(
239238
new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step));
240239
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 130 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2610,22 +2610,15 @@ void InnerLoopVectorizer::createInductionResumeValue(
26102610
assert(VectorTripCount && "Expected valid arguments");
26112611

26122612
Instruction *OldInduction = Legal->getPrimaryInduction();
2613-
Value *EndValue = nullptr;
26142613
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
26152614
if (OrigPhi == OldInduction) {
2616-
// We know what the end value is.
2617-
EndValue = VectorTripCount;
26182615
} else {
26192616
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
26202617

26212618
// Fast-math-flags propagate from the original induction instruction.
26222619
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
26232620
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
26242621

2625-
EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(),
2626-
Step, II.getKind(), II.getInductionBinOp());
2627-
EndValue->setName("ind.end");
2628-
26292622
// Compute the end value for the additional bypass (if applicable).
26302623
if (AdditionalBypass.first) {
26312624
B.SetInsertPoint(AdditionalBypass.first,
@@ -2637,26 +2630,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
26372630
}
26382631
}
26392632

2640-
VPBasicBlock *MiddleVPBB =
2641-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
2642-
2643-
VPBasicBlock *ScalarPHVPBB = nullptr;
2644-
if (MiddleVPBB->getNumSuccessors() == 2) {
2645-
// Order is strict: first is the exit block, second is the scalar preheader.
2646-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2647-
} else {
2648-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2649-
}
2650-
2651-
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2652-
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2653-
VPInstruction::ResumePhi,
2654-
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2655-
OrigPhi->getDebugLoc(), "bc.resume.val");
2656-
2657-
auto *ScalarLoopHeader =
2658-
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2659-
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
26602633
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
26612634
EndValueFromAdditionalBypass};
26622635
}
@@ -7704,10 +7677,22 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77047677
ILV.getOrCreateVectorTripCount(nullptr),
77057678
CanonicalIVStartValue, State);
77067679

7680+
VPBasicBlock *MiddleVPBB =
7681+
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7682+
7683+
VPBasicBlock *ScalarPHVPBB = nullptr;
7684+
if (MiddleVPBB->getNumSuccessors() == 2) {
7685+
// Order is strict: first is the exit block, second is the scalar
7686+
// preheader.
7687+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
7688+
} else {
7689+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
7690+
}
7691+
77077692
BestVPlan.execute(&State);
77087693

77097694
// 2.5 Collect reduction resume values.
7710-
auto *ExitVPBB =
7695+
VPBasicBlock *ExitVPBB =
77117696
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
77127697
for (VPRecipeBase &R : *ExitVPBB) {
77137698
createAndCollectMergePhiForReduction(
@@ -7992,6 +7977,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79927977
// Generate a resume induction for the vector epilogue and put it in the
79937978
// vector epilogue preheader
79947979
Type *IdxTy = Legal->getWidestInductionType();
7980+
79957981
PHINode *EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
79967982
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
79977983
EPResumeVal->addIncoming(EPI.VectorTripCount, VecEpilogueIterationCountCheck);
@@ -8879,6 +8865,74 @@ addUsersInExitBlock(VPlan &Plan,
88798865
}
88808866
}
88818867

8868+
static void addResumeValuesForInductions(VPlan &Plan) {
8869+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8870+
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
8871+
8872+
VPBuilder Builder(
8873+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()));
8874+
for (VPRecipeBase &R : Header->phis()) {
8875+
PHINode *OrigPhi;
8876+
const InductionDescriptor *ID;
8877+
VPValue *Start;
8878+
VPValue *Step;
8879+
Type *ScalarTy;
8880+
bool IsCanonical = false;
8881+
if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8882+
if (WideIV->getTruncInst())
8883+
continue;
8884+
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8885+
ID = &WideIV->getInductionDescriptor();
8886+
Start = WideIV->getStartValue();
8887+
Step = WideIV->getStepValue();
8888+
ScalarTy = WideIV->getScalarType();
8889+
IsCanonical = WideIV->isCanonical();
8890+
} else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8891+
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8892+
ID = &WideIV->getInductionDescriptor();
8893+
Start = WideIV->getStartValue();
8894+
Step = WideIV->getOperand(1);
8895+
ScalarTy = Start->getLiveInIRValue()->getType();
8896+
} else {
8897+
continue;
8898+
}
8899+
8900+
VPValue *EndValue = &Plan.getVectorTripCount();
8901+
if (!IsCanonical) {
8902+
EndValue = Builder.createDerivedIV(
8903+
ID->getKind(),
8904+
dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp()), Start,
8905+
&Plan.getVectorTripCount(), Step);
8906+
}
8907+
8908+
if (ScalarTy != TypeInfo.inferScalarType(EndValue)) {
8909+
EndValue =
8910+
Builder.createScalarCast(Instruction::Trunc, EndValue, ScalarTy);
8911+
}
8912+
8913+
VPBasicBlock *MiddleVPBB =
8914+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8915+
8916+
VPBasicBlock *ScalarPHVPBB = nullptr;
8917+
if (MiddleVPBB->getNumSuccessors() == 2) {
8918+
// Order is strict: first is the exit block, second is the scalar
8919+
// preheader.
8920+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8921+
} else {
8922+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8923+
}
8924+
8925+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8926+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
8927+
VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc(),
8928+
"bc.resume.val");
8929+
8930+
auto *ScalarLoopHeader =
8931+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
8932+
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8933+
}
8934+
}
8935+
88828936
/// Handle live-outs for first order reductions, both in the scalar preheader
88838937
/// and the original exit block:
88848938
/// 1. Feed a resume value for every FOR from the vector loop to the scalar
@@ -9174,6 +9228,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91749228
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
91759229
addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
91769230
addUsersInExitBlock(*Plan, ExitUsersToFix);
9231+
addResumeValuesForInductions(*Plan);
91779232

91789233
// ---------------------------------------------------------------------------
91799234
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9279,6 +9334,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
92799334
bool HasNUW = true;
92809335
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
92819336
DebugLoc());
9337+
addResumeValuesForInductions(*Plan);
92829338
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
92839339
return Plan;
92849340
}
@@ -9562,7 +9618,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
95629618
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
95639619
Kind, cast_if_present<BinaryOperator>(FPBinOp));
95649620
DerivedIV->setName("offset.idx");
9565-
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
9621+
assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9622+
"IV didn't need transforming?");
95669623

95679624
State.set(this, DerivedIV, VPLane(0));
95689625
}
@@ -10231,6 +10288,50 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023110288
EPI, &LVL, &CM, BFI, PSI, Checks,
1023210289
*BestMainPlan);
1023310290

10291+
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
10292+
// Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those will need their resume-values computed from the main vector loop. Others can be removed in the main VPlan.
10293+
SmallPtrSet<PHINode *, 2> WidenedPhis;
10294+
for (VPRecipeBase &R :
10295+
BestEpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
10296+
if (!isa<VPWidenIntOrFpInductionRecipe,
10297+
VPWidenPointerInductionRecipe>(&R))
10298+
continue;
10299+
if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10300+
WidenedPhis.insert(
10301+
cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode());
10302+
else
10303+
WidenedPhis.insert(
10304+
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
10305+
}
10306+
VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10307+
BestMainPlan->getVectorLoopRegion()->getSingleSuccessor());
10308+
10309+
VPBasicBlock *ScalarPHVPBB = nullptr;
10310+
if (MiddleVPBB->getNumSuccessors() == 2) {
10311+
// Order is strict: first is the exit block, second is the scalar
10312+
// preheader.
10313+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
10314+
} else {
10315+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
10316+
}
10317+
10318+
for (VPRecipeBase &R :
10319+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
10320+
auto *VPIRInst = cast<VPIRInstruction>(&R);
10321+
auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction());
10322+
if (!IRI)
10323+
break;
10324+
if (WidenedPhis.contains(IRI) ||
10325+
!LVL.getInductionVars().contains(IRI))
10326+
continue;
10327+
VPRecipeBase *ResumePhi =
10328+
VPIRInst->getOperand(0)->getDefiningRecipe();
10329+
VPIRInst->setOperand(0, BestMainPlan->getOrAddLiveIn(
10330+
Constant::getNullValue(IRI->getType())));
10331+
ResumePhi->eraseFromParent();
10332+
}
10333+
VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10334+
1023410335
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
1023510336
*BestMainPlan, MainILV, DT, true);
1023610337
++LoopsVectorized;
@@ -10239,7 +10340,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023910340
// edges from the first pass.
1024010341
EPI.MainLoopVF = EPI.EpilogueVF;
1024110342
EPI.MainLoopUF = EPI.EpilogueUF;
10242-
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
1024310343
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
1024410344
ORE, EPI, &LVL, &CM, BFI, PSI,
1024510345
Checks, BestEpiPlan);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
6363
case VPInstruction::FirstOrderRecurrenceSplice:
6464
case VPInstruction::LogicalAnd:
6565
case VPInstruction::PtrAdd:
66+
case VPInstruction::ResumePhi:
6667
return false;
6768
default:
6869
return true;

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
16-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1716
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1817
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
18+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
2020
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
2121
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
@@ -102,9 +102,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
102102
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
103103
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
104104
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
105-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
106105
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
107106
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
107+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
108108
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
109109
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
110110
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -785,11 +785,11 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
785785
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 257, [[TMP2]]
786786
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
787787
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
788+
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
789+
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
788790
; PRED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], 8
789791
; PRED-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
790792
; PRED-NEXT: [[IND_END1:%.*]] = mul i64 [[N_VEC]], 2
791-
; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
792-
; PRED-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
793793
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
794794
; PRED-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2
795795
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -522,31 +522,31 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
522522
; PRED: pred.store.continue:
523523
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
524524
; PRED-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
525-
; PRED: pred.store.if2:
525+
; PRED: pred.store.if3:
526526
; PRED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1
527527
; PRED-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP24]]
528528
; PRED-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 1
529529
; PRED-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4
530530
; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]]
531-
; PRED: pred.store.continue3:
531+
; PRED: pred.store.continue4:
532532
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
533533
; PRED-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
534-
; PRED: pred.store.if4:
534+
; PRED: pred.store.if5:
535535
; PRED-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2
536536
; PRED-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP28]]
537537
; PRED-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 2
538538
; PRED-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4
539539
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
540-
; PRED: pred.store.continue5:
540+
; PRED: pred.store.continue6:
541541
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
542542
; PRED-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
543-
; PRED: pred.store.if6:
543+
; PRED: pred.store.if7:
544544
; PRED-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3
545545
; PRED-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP32]]
546546
; PRED-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], 3
547547
; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4
548548
; PRED-NEXT: br label [[PRED_STORE_CONTINUE8]]
549-
; PRED: pred.store.continue7:
549+
; PRED: pred.store.continue8:
550550
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
551551
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]])
552552
; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
@@ -719,31 +719,31 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
719719
; PRED: pred.store.continue:
720720
; PRED-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
721721
; PRED-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
722-
; PRED: pred.store.if1:
722+
; PRED: pred.store.if2:
723723
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
724724
; PRED-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP23]]
725725
; PRED-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 1
726726
; PRED-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4
727727
; PRED-NEXT: br label [[PRED_STORE_CONTINUE3]]
728-
; PRED: pred.store.continue2:
728+
; PRED: pred.store.continue3:
729729
; PRED-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
730730
; PRED-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
731-
; PRED: pred.store.if3:
731+
; PRED: pred.store.if4:
732732
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2
733733
; PRED-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]]
734734
; PRED-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 2
735735
; PRED-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4
736736
; PRED-NEXT: br label [[PRED_STORE_CONTINUE5]]
737-
; PRED: pred.store.continue4:
737+
; PRED: pred.store.continue5:
738738
; PRED-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
739739
; PRED-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
740-
; PRED: pred.store.if5:
740+
; PRED: pred.store.if6:
741741
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
742742
; PRED-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]]
743743
; PRED-NEXT: [[TMP33:%.*]] = add i32 [[OFFSET_IDX]], 3
744744
; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4
745745
; PRED-NEXT: br label [[PRED_STORE_CONTINUE7]]
746-
; PRED: pred.store.continue6:
746+
; PRED: pred.store.continue7:
747747
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
748748
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
749749
; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
@@ -863,8 +863,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
863863
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
864864
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
865865
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
866-
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
867866
; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
867+
; PRED-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
868868
; PRED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
869869
; PRED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT3]], <2 x i64> poison, <2 x i32> zeroinitializer
870870
; PRED-NEXT: br label [[LOOP:%.*]]

0 commit comments

Comments
 (0)