Skip to content

Commit 33b3aac

Browse files
committed
[VPlan] Update induction resume values in VPlan.
Updated ILV.crateInductionResumeValues to directly update the VPIRInstructiosn wrapping the original phis with the created resume values. This is the first step towards modeling them completely in VPlan. Subsequent patches will move creation of the resume values completely into VPlan. Builds on top of #109975, which is included in this PR.
1 parent 0742090 commit 33b3aac

File tree

68 files changed

+909
-870
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+909
-870
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 70 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ class InnerLoopVectorizer {
523523
/// and the resume values can come from an additional bypass block, the \p
524524
/// AdditionalBypass pair provides information about the bypass block and the
525525
/// end value on the edge from bypass to this loop.
526-
PHINode *createInductionResumeValue(
526+
void createInductionResumeValue(
527527
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
528528
ArrayRef<BasicBlock *> BypassBlocks,
529529
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
@@ -536,6 +536,11 @@ class InnerLoopVectorizer {
536536
/// count of the original loop for both main loop and epilogue vectorization.
537537
void setTripCount(Value *TC) { TripCount = TC; }
538538

539+
std::pair<BasicBlock *, Value *>
540+
getInductionBypassValue(PHINode *OrigPhi) const {
541+
return InductionBypassValues.find(OrigPhi)->second;
542+
}
543+
539544
protected:
540545
friend class LoopVectorizationPlanner;
541546

@@ -675,6 +680,9 @@ class InnerLoopVectorizer {
675680
/// for cleaning the checks, if vectorization turns out unprofitable.
676681
GeneratedRTChecks &RTChecks;
677682

683+
/// Mapping of induction phis to their bypass values and bypass blocks.
684+
DenseMap<PHINode *, std::pair<BasicBlock *, Value *>> InductionBypassValues;
685+
678686
VPlan &Plan;
679687
};
680688

@@ -2588,7 +2596,18 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
25882596
nullptr, Twine(Prefix) + "scalar.ph");
25892597
}
25902598

2591-
PHINode *InnerLoopVectorizer::createInductionResumeValue(
2599+
static void addOperandToPhiInVPIRBasicBlock(VPIRBasicBlock *VPBB, PHINode *P,
2600+
VPValue *Op) {
2601+
for (VPRecipeBase &R : *VPBB) {
2602+
auto *IRI = cast<VPIRInstruction>(&R);
2603+
if (&IRI->getInstruction() == P) {
2604+
IRI->addOperand(Op);
2605+
break;
2606+
}
2607+
}
2608+
}
2609+
2610+
void InnerLoopVectorizer::createInductionResumeValue(
25922611
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
25932612
ArrayRef<BasicBlock *> BypassBlocks,
25942613
std::pair<BasicBlock *, Value *> AdditionalBypass) {
@@ -2623,27 +2642,28 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
26232642
}
26242643
}
26252644

2626-
// Create phi nodes to merge from the backedge-taken check block.
2627-
PHINode *BCResumeVal =
2628-
PHINode::Create(OrigPhi->getType(), 3, "bc.resume.val",
2629-
LoopScalarPreHeader->getFirstNonPHIIt());
2630-
// Copy original phi DL over to the new one.
2631-
BCResumeVal->setDebugLoc(OrigPhi->getDebugLoc());
2645+
VPBasicBlock *MiddleVPBB =
2646+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
26322647

2633-
// The new PHI merges the original incoming value, in case of a bypass,
2634-
// or the value at the end of the vectorized loop.
2635-
BCResumeVal->addIncoming(EndValue, LoopMiddleBlock);
2648+
VPBasicBlock *ScalarPHVPBB = nullptr;
2649+
if (MiddleVPBB->getNumSuccessors() == 2) {
2650+
// Order is strict: first is the exit block, second is the scalar preheader.
2651+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2652+
} else {
2653+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2654+
}
26362655

2637-
// Fix the scalar body counter (PHI node).
2638-
// The old induction's phi node in the scalar body needs the truncated
2639-
// value.
2640-
for (BasicBlock *BB : BypassBlocks)
2641-
BCResumeVal->addIncoming(II.getStartValue(), BB);
2656+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2657+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2658+
VPInstruction::ResumePhi,
2659+
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2660+
OrigPhi->getDebugLoc(), "bc.resume.val");
26422661

2643-
if (AdditionalBypass.first)
2644-
BCResumeVal->setIncomingValueForBlock(AdditionalBypass.first,
2645-
EndValueFromAdditionalBypass);
2646-
return BCResumeVal;
2662+
auto *ScalarLoopHeader =
2663+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2664+
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
2665+
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
2666+
EndValueFromAdditionalBypass};
26472667
}
26482668

26492669
/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
@@ -2676,10 +2696,8 @@ void InnerLoopVectorizer::createInductionResumeValues(
26762696
for (const auto &InductionEntry : Legal->getInductionVars()) {
26772697
PHINode *OrigPhi = InductionEntry.first;
26782698
const InductionDescriptor &II = InductionEntry.second;
2679-
PHINode *BCResumeVal = createInductionResumeValue(
2680-
OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
2681-
AdditionalBypass);
2682-
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
2699+
createInductionResumeValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2700+
LoopBypassBlocks, AdditionalBypass);
26832701
}
26842702
}
26852703

@@ -7803,6 +7821,25 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78037821
// the second pass for the scalar loop. The induction resume values for the
78047822
// inductions in the epilogue loop are created before executing the plan for
78057823
// the epilogue loop.
7824+
for (VPRecipeBase &R :
7825+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
7826+
// Create induction resume values for both widened pointer and
7827+
// integer/fp inductions and update the start value of the induction
7828+
// recipes to use the resume value.
7829+
PHINode *IndPhi = nullptr;
7830+
const InductionDescriptor *ID;
7831+
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
7832+
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
7833+
ID = &Ind->getInductionDescriptor();
7834+
} else if (auto *WidenInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
7835+
IndPhi = WidenInd->getPHINode();
7836+
ID = &WidenInd->getInductionDescriptor();
7837+
} else
7838+
continue;
7839+
7840+
createInductionResumeValue(IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
7841+
LoopBypassBlocks);
7842+
}
78067843

78077844
return {LoopVectorPreHeader, nullptr};
78087845
}
@@ -10293,23 +10330,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1029310330
RdxDesc.getRecurrenceStartValue());
1029410331
}
1029510332
} else {
10296-
// Create induction resume values for both widened pointer and
10297-
// integer/fp inductions and update the start value of the induction
10298-
// recipes to use the resume value.
10333+
// Retrive the induction resume values for wide inductions from
10334+
// their original phi nodes in the scalar loop
1029910335
PHINode *IndPhi = nullptr;
10300-
const InductionDescriptor *ID;
1030110336
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
1030210337
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
10303-
ID = &Ind->getInductionDescriptor();
1030410338
} else {
1030510339
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
1030610340
IndPhi = WidenInd->getPHINode();
10307-
ID = &WidenInd->getInductionDescriptor();
1030810341
}
10309-
10310-
ResumeV = MainILV.createInductionResumeValue(
10311-
IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
10312-
{EPI.MainLoopIterationCountCheck});
10342+
ResumeV = IndPhi->getIncomingValueForBlock(L->getLoopPreheader());
1031310343
}
1031410344
assert(ResumeV && "Must have a resume value");
1031510345
VPValue *StartVal = BestEpiPlan.getOrAddLiveIn(ResumeV);
@@ -10321,7 +10351,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1032110351
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
1032210352
DT, true, &ExpandedSCEVs);
1032310353
++LoopsEpilogueVectorized;
10354+
BasicBlock *PH = L->getLoopPreheader();
1032410355

10356+
for (const auto &[IVPhi, _] : LVL.getInductionVars()) {
10357+
auto *Inc = cast<PHINode>(IVPhi->getIncomingValueForBlock(PH));
10358+
const auto &[BB, V] = EpilogILV.getInductionBypassValue(IVPhi);
10359+
Inc->setIncomingValueForBlock(BB, V);
10360+
}
1032510361
if (!MainILV.areSafetyChecksAdded())
1032610362
DisableRuntimeUnroll = true;
1032710363
} else {

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
629629
State.CFG
630630
.VPBB2IRBB[cast<VPBasicBlock>(getParent()->getSinglePredecessor())];
631631
NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred);
632-
for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) {
632+
for (auto *OtherPred :
633+
reverse(to_vector(predecessors(Builder.GetInsertBlock())))) {
633634
assert(OtherPred != VPlanPred &&
634635
"VPlan predecessors should not be connected yet");
635636
NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred);

llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
3535
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[WIDE_LOAD1]], i32 1
3636
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
3737
; CHECK: [[SCALAR_PH]]:
38-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
3938
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
39+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
4040
; CHECK-NEXT: br label %[[LOOP:.*]]
4141
; CHECK: [[LOOP]]:
4242
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,10 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
205205
; CHECK: vector.ph:
206206
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
207207
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
208+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
208209
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0
209210
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
210211
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
211-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START]], [[N_VEC]]
212212
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
213213
; CHECK: vector.body:
214214
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -433,7 +433,7 @@ define void @test_widen_extended_induction(ptr %dst) {
433433
; CHECK: vec.epilog.middle.block:
434434
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
435435
; CHECK: vec.epilog.scalar.ph:
436-
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
436+
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i8 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 16, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
437437
; CHECK-NEXT: br label [[LOOP:%.*]]
438438
; CHECK: loop:
439439
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
7373
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3
7474
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
7575
; CHECK: scalar.ph:
76-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
76+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
77+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
7878
; CHECK-NEXT: br label [[LOOP:%.*]]
7979
; CHECK: loop:
8080
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
4848
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
4949
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
5050
; CHECK: scalar.ph:
51-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
5251
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
52+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ]
5353
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
5454
; CHECK: for.cond.cleanup.loopexit:
5555
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
@@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
154154
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
155155
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
156156
; CHECK: scalar.ph:
157-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ]
158157
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ]
159158
; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ]
160159
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ]
160+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ]
161161
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
162162
; CHECK: for.cond.cleanup.loopexit:
163163
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
114114
; DEFAULT-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]
115115
; DEFAULT-NEXT: br i1 [[CMP_N7]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
116116
; DEFAULT: vec.epilog.scalar.ph:
117-
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
117+
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
118118
; DEFAULT-NEXT: br label [[LOOP:%.*]]
119119
; DEFAULT: loop:
120120
; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -522,31 +522,31 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
522522
; PRED: pred.store.continue:
523523
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
524524
; PRED-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
525-
; PRED: pred.store.if3:
525+
; PRED: pred.store.if2:
526526
; PRED-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP18]], i32 1
527527
; PRED-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP24]]
528528
; PRED-NEXT: [[TMP26:%.*]] = add i32 [[OFFSET_IDX]], 1
529529
; PRED-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4
530530
; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]]
531-
; PRED: pred.store.continue4:
531+
; PRED: pred.store.continue3:
532532
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
533533
; PRED-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
534-
; PRED: pred.store.if5:
534+
; PRED: pred.store.if4:
535535
; PRED-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP18]], i32 2
536536
; PRED-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP28]]
537537
; PRED-NEXT: [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], 2
538538
; PRED-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4
539539
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
540-
; PRED: pred.store.continue6:
540+
; PRED: pred.store.continue5:
541541
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
542542
; PRED-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
543-
; PRED: pred.store.if7:
543+
; PRED: pred.store.if6:
544544
; PRED-NEXT: [[TMP32:%.*]] = extractelement <4 x i64> [[TMP18]], i32 3
545545
; PRED-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP32]]
546546
; PRED-NEXT: [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], 3
547547
; PRED-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4
548548
; PRED-NEXT: br label [[PRED_STORE_CONTINUE8]]
549-
; PRED: pred.store.continue8:
549+
; PRED: pred.store.continue7:
550550
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
551551
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]])
552552
; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
@@ -719,31 +719,31 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
719719
; PRED: pred.store.continue:
720720
; PRED-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
721721
; PRED-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
722-
; PRED: pred.store.if2:
722+
; PRED: pred.store.if1:
723723
; PRED-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP17]], i32 1
724724
; PRED-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP23]]
725725
; PRED-NEXT: [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], 1
726726
; PRED-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4
727727
; PRED-NEXT: br label [[PRED_STORE_CONTINUE3]]
728-
; PRED: pred.store.continue3:
728+
; PRED: pred.store.continue2:
729729
; PRED-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
730730
; PRED-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
731-
; PRED: pred.store.if4:
731+
; PRED: pred.store.if3:
732732
; PRED-NEXT: [[TMP27:%.*]] = extractelement <4 x i64> [[TMP17]], i32 2
733733
; PRED-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP27]]
734734
; PRED-NEXT: [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], 2
735735
; PRED-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4
736736
; PRED-NEXT: br label [[PRED_STORE_CONTINUE5]]
737-
; PRED: pred.store.continue5:
737+
; PRED: pred.store.continue4:
738738
; PRED-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
739739
; PRED-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
740-
; PRED: pred.store.if6:
740+
; PRED: pred.store.if5:
741741
; PRED-NEXT: [[TMP31:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
742742
; PRED-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP31]]
743743
; PRED-NEXT: [[TMP33:%.*]] = add i32 [[OFFSET_IDX]], 3
744744
; PRED-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4
745745
; PRED-NEXT: br label [[PRED_STORE_CONTINUE7]]
746-
; PRED: pred.store.continue7:
746+
; PRED: pred.store.continue6:
747747
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
748748
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
749749
; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
@@ -884,12 +884,12 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
884884
; PRED: pred.store.continue:
885885
; PRED-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
886886
; PRED-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
887-
; PRED: pred.store.if5:
887+
; PRED: pred.store.if4:
888888
; PRED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
889889
; PRED-NEXT: [[TMP13:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP12]], i32 2
890890
; PRED-NEXT: store i32 0, ptr [[TMP13]], align 8
891891
; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
892-
; PRED: pred.store.continue6:
892+
; PRED: pred.store.continue5:
893893
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
894894
; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
895895
; PRED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]

0 commit comments

Comments
 (0)