Skip to content

Commit d65cdf1

Browse files
committed
[VPlan] Compute induction end values in VPlan.
Use createDerivedIV to compute IV end values directly in VPlan, instead of creating them up-front. This allows updating IV users outside the loop as follow-up. Depends on llvm#110004 and llvm#109975.
1 parent 572cff5 commit d65cdf1

33 files changed

+262
-189
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ class VPBuilder {
233233

234234
VPDerivedIVRecipe *createDerivedIV(InductionDescriptor::InductionKind Kind,
235235
FPMathOperator *FPBinOp, VPValue *Start,
236-
VPCanonicalIVPHIRecipe *CanonicalIV,
237-
VPValue *Step, const Twine &Name = "") {
236+
VPValue *CanonicalIV, VPValue *Step,
237+
const Twine &Name = "") {
238238
return tryInsertInstruction(
239239
new VPDerivedIVRecipe(Kind, FPBinOp, Start, CanonicalIV, Step, Name));
240240
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 152 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ class InnerLoopVectorizer {
523523
/// and the resume values can come from an additional bypass block, the \p
524524
/// AdditionalBypass pair provides information about the bypass block and the
525525
/// end value on the edge from bypass to this loop.
526-
void createInductionResumeValue(
526+
void createInductionBypassValue(
527527
PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
528528
ArrayRef<BasicBlock *> BypassBlocks,
529529
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
@@ -574,15 +574,11 @@ class InnerLoopVectorizer {
574574
/// vector loop preheader, middle block and scalar preheader.
575575
void createVectorLoopSkeleton(StringRef Prefix);
576576

577-
/// Create new phi nodes for the induction variables to resume iteration count
578-
/// in the scalar epilogue, from where the vectorized loop left off.
579-
/// In cases where the loop skeleton is more complicated (eg. epilogue
580-
/// vectorization) and the resume values can come from an additional bypass
581-
/// block, the \p AdditionalBypass pair provides information about the bypass
582-
/// block and the end value on the edge from bypass to this loop.
583-
void createInductionResumeValues(
577+
/// Create values for the induction variables to resume iteration count
578+
/// in bypass block.
579+
void createInductionBypassValues(
584580
const SCEV2ValueTy &ExpandedSCEVs,
585-
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
581+
std::pair<BasicBlock *, Value *> AdditionalBypass);
586582

587583
/// Allow subclasses to override and print debug traces before/after vplan
588584
/// execution, when trace information is requested.
@@ -2602,30 +2598,19 @@ static void addOperandToPhiInVPIRBasicBlock(VPIRBasicBlock *VPBB, PHINode *P,
26022598
}
26032599
}
26042600

2605-
void InnerLoopVectorizer::createInductionResumeValue(
2601+
void InnerLoopVectorizer::createInductionBypassValue(
26062602
PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
26072603
ArrayRef<BasicBlock *> BypassBlocks,
26082604
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2609-
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
2610-
assert(VectorTripCount && "Expected valid arguments");
2611-
26122605
Instruction *OldInduction = Legal->getPrimaryInduction();
2613-
Value *EndValue = nullptr;
26142606
Value *EndValueFromAdditionalBypass = AdditionalBypass.second;
2615-
if (OrigPhi == OldInduction) {
2616-
// We know what the end value is.
2617-
EndValue = VectorTripCount;
2618-
} else {
2607+
if (OrigPhi != OldInduction) {
26192608
IRBuilder<> B(LoopVectorPreHeader->getTerminator());
26202609

26212610
// Fast-math-flags propagate from the original induction instruction.
26222611
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
26232612
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
26242613

2625-
EndValue = emitTransformedIndex(B, VectorTripCount, II.getStartValue(),
2626-
Step, II.getKind(), II.getInductionBinOp());
2627-
EndValue->setName("ind.end");
2628-
26292614
// Compute the end value for the additional bypass (if applicable).
26302615
if (AdditionalBypass.first) {
26312616
B.SetInsertPoint(AdditionalBypass.first,
@@ -2637,26 +2622,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
26372622
}
26382623
}
26392624

2640-
VPBasicBlock *MiddleVPBB =
2641-
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
2642-
2643-
VPBasicBlock *ScalarPHVPBB = nullptr;
2644-
if (MiddleVPBB->getNumSuccessors() == 2) {
2645-
// Order is strict: first is the exit block, second is the scalar preheader.
2646-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
2647-
} else {
2648-
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
2649-
}
2650-
2651-
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
2652-
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
2653-
VPInstruction::ResumePhi,
2654-
{Plan.getOrAddLiveIn(EndValue), Plan.getOrAddLiveIn(II.getStartValue())},
2655-
OrigPhi->getDebugLoc(), "bc.resume.val");
2656-
2657-
auto *ScalarLoopHeader =
2658-
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
2659-
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
26602625
InductionBypassValues[OrigPhi] = {AdditionalBypass.first,
26612626
EndValueFromAdditionalBypass};
26622627
}
@@ -2675,23 +2640,16 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
26752640
return I->second;
26762641
}
26772642

2678-
void InnerLoopVectorizer::createInductionResumeValues(
2643+
void InnerLoopVectorizer::createInductionBypassValues(
26792644
const SCEV2ValueTy &ExpandedSCEVs,
26802645
std::pair<BasicBlock *, Value *> AdditionalBypass) {
2681-
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
2682-
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
2683-
"Inconsistent information about additional bypass.");
2684-
// We are going to resume the execution of the scalar loop.
2685-
// Go over all of the induction variables that we found and fix the
2686-
// PHIs that are left in the scalar version of the loop.
2687-
// The starting values of PHI nodes depend on the counter of the last
2688-
// iteration in the vectorized loop.
2689-
// If we come from a bypass edge then we need to start from the original
2690-
// start value.
2646+
assert(AdditionalBypass.first && AdditionalBypass.second &&
2647+
"Must have bypass information");
2648+
26912649
for (const auto &InductionEntry : Legal->getInductionVars()) {
26922650
PHINode *OrigPhi = InductionEntry.first;
26932651
const InductionDescriptor &II = InductionEntry.second;
2694-
createInductionResumeValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
2652+
createInductionBypassValue(OrigPhi, II, getExpandedStep(II, ExpandedSCEVs),
26952653
LoopBypassBlocks, AdditionalBypass);
26962654
}
26972655
}
@@ -2754,8 +2712,8 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton(
27542712
// faster.
27552713
emitMemRuntimeChecks(LoopScalarPreHeader);
27562714

2757-
// Emit phis for the new starting index of the scalar loop.
2758-
createInductionResumeValues(ExpandedSCEVs);
2715+
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
2716+
assert(VectorTripCount && "Expected valid arguments");
27592717

27602718
return {LoopVectorPreHeader, nullptr};
27612719
}
@@ -7719,6 +7677,18 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77197677
ILV.getOrCreateVectorTripCount(nullptr),
77207678
CanonicalIVStartValue, State);
77217679

7680+
VPBasicBlock *MiddleVPBB =
7681+
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
7682+
7683+
VPBasicBlock *ScalarPHVPBB = nullptr;
7684+
if (MiddleVPBB->getNumSuccessors() == 2) {
7685+
// Order is strict: first is the exit block, second is the scalar
7686+
// preheader.
7687+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
7688+
} else {
7689+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
7690+
}
7691+
77227692
BestVPlan.execute(&State);
77237693

77247694
// 2.5 Collect reduction resume values.
@@ -7836,7 +7806,7 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
78367806
} else
78377807
continue;
78387808

7839-
createInductionResumeValue(IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
7809+
createInductionBypassValue(IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
78407810
LoopBypassBlocks);
78417811
}
78427812

@@ -8006,20 +7976,22 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
80067976
// Generate a resume induction for the vector epilogue and put it in the
80077977
// vector epilogue preheader
80087978
Type *IdxTy = Legal->getWidestInductionType();
7979+
80097980
PHINode *EPResumeVal = PHINode::Create(IdxTy, 2, "vec.epilog.resume.val");
80107981
EPResumeVal->insertBefore(LoopVectorPreHeader->getFirstNonPHIIt());
80117982
EPResumeVal->addIncoming(EPI.VectorTripCount, VecEpilogueIterationCountCheck);
80127983
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
80137984
EPI.MainLoopIterationCountCheck);
80147985

8015-
// Generate induction resume values. These variables save the new starting
8016-
// indexes for the scalar loop. They are used to test if there are any tail
8017-
// iterations left once the vector loop has completed.
7986+
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
7987+
assert(VectorTripCount && "Expected valid arguments");
7988+
7989+
// Generate induction resume values for the bypass blocks.
80187990
// Note that when the vectorized epilogue is skipped due to iteration count
80197991
// check, then the resume value for the induction variable comes from
80207992
// the trip count of the main vector loop, hence passing the AdditionalBypass
80217993
// argument.
8022-
createInductionResumeValues(ExpandedSCEVs,
7994+
createInductionBypassValues(ExpandedSCEVs,
80237995
{VecEpilogueIterationCountCheck,
80247996
EPI.VectorTripCount} /* AdditionalBypass */);
80257997

@@ -8932,6 +8904,74 @@ addUsersInExitBlock(VPlan &Plan,
89328904
}
89338905
}
89348906

8907+
static void addResumeValuesForInductions(VPlan &Plan) {
8908+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
8909+
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
8910+
8911+
VPBuilder Builder(
8912+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()));
8913+
for (VPRecipeBase &R : Header->phis()) {
8914+
PHINode *OrigPhi;
8915+
const InductionDescriptor *ID;
8916+
VPValue *Start;
8917+
VPValue *Step;
8918+
Type *ScalarTy;
8919+
bool IsCanonical = false;
8920+
if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8921+
if (WideIV->getTruncInst())
8922+
continue;
8923+
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8924+
ID = &WideIV->getInductionDescriptor();
8925+
Start = WideIV->getStartValue();
8926+
Step = WideIV->getStepValue();
8927+
ScalarTy = WideIV->getScalarType();
8928+
IsCanonical = WideIV->isCanonical();
8929+
} else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8930+
OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue());
8931+
ID = &WideIV->getInductionDescriptor();
8932+
Start = WideIV->getStartValue();
8933+
Step = WideIV->getOperand(1);
8934+
ScalarTy = Start->getLiveInIRValue()->getType();
8935+
} else {
8936+
continue;
8937+
}
8938+
8939+
VPValue *EndValue = &Plan.getVectorTripCount();
8940+
if (!IsCanonical) {
8941+
EndValue = Builder.createDerivedIV(
8942+
ID->getKind(),
8943+
dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp()), Start,
8944+
&Plan.getVectorTripCount(), Step);
8945+
}
8946+
8947+
if (ScalarTy != TypeInfo.inferScalarType(EndValue)) {
8948+
EndValue =
8949+
Builder.createScalarCast(Instruction::Trunc, EndValue, ScalarTy);
8950+
}
8951+
8952+
VPBasicBlock *MiddleVPBB =
8953+
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
8954+
8955+
VPBasicBlock *ScalarPHVPBB = nullptr;
8956+
if (MiddleVPBB->getNumSuccessors() == 2) {
8957+
// Order is strict: first is the exit block, second is the scalar
8958+
// preheader.
8959+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
8960+
} else {
8961+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
8962+
}
8963+
8964+
VPBuilder ScalarPHBuilder(ScalarPHVPBB);
8965+
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
8966+
VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc(),
8967+
"bc.resume.val");
8968+
8969+
auto *ScalarLoopHeader =
8970+
cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor());
8971+
addOperandToPhiInVPIRBasicBlock(ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8972+
}
8973+
}
8974+
89358975
/// Handle users in the exit block for first order reductions in the original
89368976
/// exit block. The penultimate value of recurrences is fed to their LCSSA phi
89378977
/// users in the original exit block using the VPIRInstruction wrapping to the
@@ -9205,6 +9245,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92059245
OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
92069246
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
92079247
addUsersInExitBlock(*Plan, ExitUsersToFix);
9248+
addResumeValuesForInductions(*Plan);
9249+
92089250
// ---------------------------------------------------------------------------
92099251
// Transform initial VPlan: Apply previously taken decisions, in order, to
92109252
// bring the VPlan to its final state.
@@ -9315,6 +9357,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
93159357
bool HasNUW = true;
93169358
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), HasNUW,
93179359
DebugLoc());
9360+
addResumeValuesForInductions(*Plan);
93189361
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
93199362
return Plan;
93209363
}
@@ -9599,7 +9642,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
95999642
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
96009643
Kind, cast_if_present<BinaryOperator>(FPBinOp));
96019644
DerivedIV->setName(Name);
9602-
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
9645+
/* assert((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&*/
9646+
/*"IV didn't need transforming?");*/
96039647

96049648
State.set(this, DerivedIV, VPLane(0));
96059649
}
@@ -10268,6 +10312,52 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1026810312
EPI, &LVL, &CM, BFI, PSI, Checks,
1026910313
*BestMainPlan);
1027010314

10315+
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
10316+
// Collect PHI nodes of wide inductions in the VPlan for the epilogue.
10317+
// Those will need their resume-values computed from the main vector
10318+
// loop. Others can be removed in the main VPlan.
10319+
SmallPtrSet<PHINode *, 2> WidenedPhis;
10320+
for (VPRecipeBase &R :
10321+
BestEpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
10322+
if (!isa<VPWidenIntOrFpInductionRecipe,
10323+
VPWidenPointerInductionRecipe>(&R))
10324+
continue;
10325+
if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10326+
WidenedPhis.insert(
10327+
cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode());
10328+
else
10329+
WidenedPhis.insert(
10330+
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
10331+
}
10332+
VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10333+
BestMainPlan->getVectorLoopRegion()->getSingleSuccessor());
10334+
10335+
VPBasicBlock *ScalarPHVPBB = nullptr;
10336+
if (MiddleVPBB->getNumSuccessors() == 2) {
10337+
// Order is strict: first is the exit block, second is the scalar
10338+
// preheader.
10339+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
10340+
} else {
10341+
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
10342+
}
10343+
10344+
for (VPRecipeBase &R :
10345+
*cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor())) {
10346+
auto *VPIRInst = cast<VPIRInstruction>(&R);
10347+
auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction());
10348+
if (!IRI)
10349+
break;
10350+
if (WidenedPhis.contains(IRI) ||
10351+
!LVL.getInductionVars().contains(IRI))
10352+
continue;
10353+
VPRecipeBase *ResumePhi =
10354+
VPIRInst->getOperand(0)->getDefiningRecipe();
10355+
VPIRInst->setOperand(0, BestMainPlan->getOrAddLiveIn(
10356+
Constant::getNullValue(IRI->getType())));
10357+
ResumePhi->eraseFromParent();
10358+
}
10359+
VPlanTransforms::removeDeadRecipes(*BestMainPlan);
10360+
1027110361
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
1027210362
*BestMainPlan, MainILV, DT, false);
1027310363
++LoopsVectorized;
@@ -10276,7 +10366,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1027610366
// edges from the first pass.
1027710367
EPI.MainLoopVF = EPI.EpilogueVF;
1027810368
EPI.MainLoopUF = EPI.EpilogueUF;
10279-
VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF);
1028010369
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
1028110370
ORE, EPI, &LVL, &CM, BFI, PSI,
1028210371
Checks, BestEpiPlan);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
6464
case VPInstruction::FirstOrderRecurrenceSplice:
6565
case VPInstruction::LogicalAnd:
6666
case VPInstruction::PtrAdd:
67+
case VPInstruction::ResumePhi:
6768
return false;
6869
default:
6970
return true;

llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
1313
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 8, [[TMP4]]
1414
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
1515
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
16-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1716
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1817
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
18+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
1919
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 8)
2020
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
2121
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer
@@ -102,9 +102,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
102102
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP4]]
103103
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
104104
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
105-
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
106105
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
107106
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
107+
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[N_VEC]]
108108
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
109109
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
110110
; CHECK-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i64> [[TMP7]], zeroinitializer

0 commit comments

Comments
 (0)