Skip to content

Commit aae7ac6

Browse files
committed
[VPlan] Remove VPIteration, update to use directly VPLane instead (NFC)
After 8ec4067 (#95842), only the lane part of VPIteration is used. Simplify the code by replacing remaining uses of VPIteration with VPLane directly.
1 parent 556ec4a commit aae7ac6

File tree

4 files changed

+113
-134
lines changed

4 files changed

+113
-134
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -505,8 +505,7 @@ class InnerLoopVectorizer {
505505
/// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p
506506
/// Instr's operands.
507507
void scalarizeInstruction(const Instruction *Instr,
508-
VPReplicateRecipe *RepRecipe,
509-
const VPIteration &Instance,
508+
VPReplicateRecipe *RepRecipe, const VPLane &Lane,
510509
VPTransformState &State);
511510

512511
/// Fix the non-induction PHIs in \p Plan.
@@ -2322,14 +2321,14 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
23222321

23232322
void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23242323
VPReplicateRecipe *RepRecipe,
2325-
const VPIteration &Instance,
2324+
const VPLane &Lane,
23262325
VPTransformState &State) {
23272326
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
23282327

23292328
// llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for
23302329
// the first lane and part.
23312330
if (isa<NoAliasScopeDeclInst>(Instr))
2332-
if (!Instance.isFirstIteration())
2331+
if (!Lane.isFirstLane())
23332332
return;
23342333

23352334
// Does this instruction return a value ?
@@ -2354,18 +2353,18 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23542353
// Replace the operands of the cloned instructions with their scalar
23552354
// equivalents in the new loop.
23562355
for (const auto &I : enumerate(RepRecipe->operands())) {
2357-
auto InputInstance = Instance;
2356+
auto InputLane = Lane;
23582357
VPValue *Operand = I.value();
23592358
if (vputils::isUniformAfterVectorization(Operand))
2360-
InputInstance.Lane = VPLane::getFirstLane();
2361-
Cloned->setOperand(I.index(), State.get(Operand, InputInstance));
2359+
InputLane = VPLane::getFirstLane();
2360+
Cloned->setOperand(I.index(), State.get(Operand, InputLane));
23622361
}
23632362
State.addNewMetadata(Cloned, Instr);
23642363

23652364
// Place the cloned scalar in the new loop.
23662365
State.Builder.Insert(Cloned);
23672366

2368-
State.set(RepRecipe, Cloned, Instance);
2367+
State.set(RepRecipe, Cloned, Lane);
23692368

23702369
// If we just cloned a new assumption, add it the assumption cache.
23712370
if (auto *II = dyn_cast<AssumeInst>(Cloned))
@@ -2784,7 +2783,7 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
27842783
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
27852784
assert(StepVPV && "step must have been expanded during VPlan execution");
27862785
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
2787-
: State.get(StepVPV, {0, 0});
2786+
: State.get(StepVPV, VPLane(0));
27882787
Value *Escape =
27892788
emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step,
27902789
II.getKind(), II.getInductionBinOp());
@@ -7435,8 +7434,7 @@ static void createAndCollectMergePhiForReduction(
74357434
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
74367435
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
74377436

7438-
Value *FinalValue =
7439-
State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
7437+
Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
74407438
auto *ResumePhi =
74417439
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
74427440
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
@@ -7525,7 +7523,7 @@ LoopVectorizationPlanner::executePlan(
75257523
BestVPlan.getPreheader()->execute(&State);
75267524
}
75277525
if (!ILV.getTripCount())
7528-
ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0}));
7526+
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
75297527
else
75307528
assert(IsEpilogueVectorization && "should only re-use the existing trip "
75317529
"count during epilogue vectorization");
@@ -9409,48 +9407,48 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
94099407
}
94109408

94119409
void VPDerivedIVRecipe::execute(VPTransformState &State) {
9412-
assert(!State.Instance && "VPDerivedIVRecipe being replicated.");
9410+
assert(!State.Lane && "VPDerivedIVRecipe being replicated.");
94139411

94149412
// Fast-math-flags propagate from the original induction instruction.
94159413
IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
94169414
if (FPBinOp)
94179415
State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags());
94189416

9419-
Value *Step = State.get(getStepValue(), VPIteration(0, 0));
9420-
Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0));
9417+
Value *Step = State.get(getStepValue(), VPLane(0));
9418+
Value *CanonicalIV = State.get(getOperand(1), VPLane(0));
94219419
Value *DerivedIV = emitTransformedIndex(
94229420
State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step,
94239421
Kind, cast_if_present<BinaryOperator>(FPBinOp));
94249422
DerivedIV->setName("offset.idx");
94259423
assert(DerivedIV != CanonicalIV && "IV didn't need transforming?");
94269424

9427-
State.set(this, DerivedIV, VPIteration(0, 0));
9425+
State.set(this, DerivedIV, VPLane(0));
94289426
}
94299427

94309428
void VPReplicateRecipe::execute(VPTransformState &State) {
94319429
Instruction *UI = getUnderlyingInstr();
9432-
if (State.Instance) { // Generate a single instance.
9430+
if (State.Lane) { // Generate a single instance.
94339431
assert((State.VF.isScalar() || !isUniform()) &&
94349432
"uniform recipe shouldn't be predicated");
94359433
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
9436-
State.ILV->scalarizeInstruction(UI, this, *State.Instance, State);
9434+
State.ILV->scalarizeInstruction(UI, this, *State.Lane, State);
94379435
// Insert scalar instance packing it into a vector.
94389436
if (State.VF.isVector() && shouldPack()) {
94399437
// If we're constructing lane 0, initialize to start from poison.
9440-
if (State.Instance->Lane.isFirstLane()) {
9438+
if (State.Lane->isFirstLane()) {
94419439
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
94429440
Value *Poison = PoisonValue::get(
94439441
VectorType::get(UI->getType(), State.VF));
94449442
State.set(this, Poison);
94459443
}
9446-
State.packScalarIntoVectorValue(this, *State.Instance);
9444+
State.packScalarIntoVectorValue(this, *State.Lane);
94479445
}
94489446
return;
94499447
}
94509448

94519449
if (IsUniform) {
94529450
// Uniform within VL means we need to generate lane 0.
9453-
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
9451+
State.ILV->scalarizeInstruction(UI, this, VPLane(0), State);
94549452
return;
94559453
}
94569454

@@ -9459,15 +9457,15 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
94599457
if (isa<StoreInst>(UI) &&
94609458
vputils::isUniformAfterVectorization(getOperand(1))) {
94619459
auto Lane = VPLane::getLastLaneForVF(State.VF);
9462-
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
9460+
State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State);
94639461
return;
94649462
}
94659463

94669464
// Generate scalar instances for all VF lanes.
94679465
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
94689466
const unsigned EndLane = State.VF.getKnownMinValue();
94699467
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
9470-
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
9468+
State.ILV->scalarizeInstruction(UI, this, VPLane(Lane), State);
94719469
}
94729470

94739471
// Determine how to lower the scalar epilogue, which depends on 1) optimising

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -228,28 +228,27 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
228228
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229229
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
230230

231-
Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
231+
Value *VPTransformState::get(VPValue *Def, const VPLane &Lane) {
232232
if (Def->isLiveIn())
233233
return Def->getLiveInIRValue();
234234

235-
if (hasScalarValue(Def, Instance)) {
236-
return Data.VPV2Scalars[Def][Instance.Lane.mapToCacheIndex(VF)];
237-
}
238-
if (!Instance.Lane.isFirstLane() &&
239-
vputils::isUniformAfterVectorization(Def) &&
240-
hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) {
235+
if (hasScalarValue(Def, Lane))
236+
return Data.VPV2Scalars[Def][Lane.mapToCacheIndex(VF)];
237+
238+
if (!Lane.isFirstLane() && vputils::isUniformAfterVectorization(Def) &&
239+
hasScalarValue(Def, VPLane::getFirstLane())) {
241240
return Data.VPV2Scalars[Def][0];
242241
}
243242

244243
assert(hasVectorValue(Def));
245244
auto *VecPart = Data.VPV2Vector[Def];
246245
if (!VecPart->getType()->isVectorTy()) {
247-
assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
246+
assert(Lane.isFirstLane() && "cannot get lane > 0 for scalar");
248247
return VecPart;
249248
}
250249
// TODO: Cache created scalar values.
251-
Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
252-
auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
250+
Value *LaneV = Lane.getAsRuntimeExpr(Builder, VF);
251+
auto *Extract = Builder.CreateExtractElement(VecPart, LaneV);
253252
// set(Def, Extract, Instance);
254253
return Extract;
255254
}
@@ -258,11 +257,11 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
258257
if (NeedsScalar) {
259258
assert((VF.isScalar() || Def->isLiveIn() || hasVectorValue(Def) ||
260259
!vputils::onlyFirstLaneUsed(Def) ||
261-
(hasScalarValue(Def, VPIteration(0, 0)) &&
260+
(hasScalarValue(Def, VPLane(0)) &&
262261
Data.VPV2Scalars[Def].size() == 1)) &&
263262
"Trying to access a single scalar per part but has multiple scalars "
264263
"per part.");
265-
return get(Def, VPIteration(0, 0));
264+
return get(Def, VPLane(0));
266265
}
267266

268267
// If Values have been set for this Def return the one relevant for \p Part.
@@ -289,15 +288,15 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
289288
return Shuf;
290289
};
291290

292-
if (!hasScalarValue(Def, {0, 0})) {
291+
if (!hasScalarValue(Def, {0})) {
293292
assert(Def->isLiveIn() && "expected a live-in");
294293
Value *IRV = Def->getLiveInIRValue();
295294
Value *B = GetBroadcastInstrs(IRV);
296295
set(Def, B);
297296
return B;
298297
}
299298

300-
Value *ScalarValue = get(Def, {0, 0});
299+
Value *ScalarValue = get(Def, VPLane(0));
301300
// If we aren't vectorizing, we can just copy the scalar map values over
302301
// to the vector map.
303302
if (VF.isScalar()) {
@@ -307,9 +306,9 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
307306

308307
bool IsUniform = vputils::isUniformAfterVectorization(Def);
309308

310-
unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
309+
VPLane LastLane(IsUniform ? 0 : VF.getKnownMinValue() - 1);
311310
// Check if there is a scalar value for the selected lane.
312-
if (!hasScalarValue(Def, {0, LastLane})) {
311+
if (!hasScalarValue(Def, LastLane)) {
313312
// At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
314313
// VPExpandSCEVRecipes can also be uniform.
315314
assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
@@ -320,7 +319,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
320319
LastLane = 0;
321320
}
322321

323-
auto *LastInst = cast<Instruction>(get(Def, {0, LastLane}));
322+
auto *LastInst = cast<Instruction>(get(Def, LastLane));
324323
// Set the insert point after the last scalarized instruction or after the
325324
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence
326325
// will directly follow the scalar definitions.
@@ -347,7 +346,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
347346
Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
348347
set(Def, Undef);
349348
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
350-
packScalarIntoVectorValue(Def, {0, Lane});
349+
packScalarIntoVectorValue(Def, Lane);
351350
VectorValue = get(Def);
352351
}
353352
Builder.restoreIP(OldIP);
@@ -401,11 +400,11 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) {
401400
}
402401

403402
void VPTransformState::packScalarIntoVectorValue(VPValue *Def,
404-
const VPIteration &Instance) {
405-
Value *ScalarInst = get(Def, Instance);
403+
const VPLane &Lane) {
404+
Value *ScalarInst = get(Def, Lane);
406405
Value *VectorValue = get(Def);
407-
VectorValue = Builder.CreateInsertElement(
408-
VectorValue, ScalarInst, Instance.Lane.getAsRuntimeExpr(Builder, VF));
406+
VectorValue = Builder.CreateInsertElement(VectorValue, ScalarInst,
407+
Lane.getAsRuntimeExpr(Builder, VF));
409408
set(Def, VectorValue);
410409
}
411410

@@ -483,7 +482,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
483482
}
484483

485484
void VPBasicBlock::execute(VPTransformState *State) {
486-
bool Replica = State->Instance && !State->Instance->isFirstIteration();
485+
bool Replica = bool(State->Lane);
487486
VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
488487
VPBlockBase *SingleHPred = nullptr;
489488
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
@@ -765,14 +764,14 @@ void VPRegionBlock::execute(VPTransformState *State) {
765764
return;
766765
}
767766

768-
assert(!State->Instance && "Replicating a Region with non-null instance.");
767+
assert(!State->Lane && "Replicating a Region with non-null instance.");
769768

770769
// Enter replicating mode.
771-
State->Instance = VPIteration(0, 0);
772770
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
771+
State->Lane = VPLane(0);
773772
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
774773
++Lane) {
775-
State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
774+
State->Lane = VPLane(Lane, VPLane::Kind::First);
776775
// Visit the VPBlocks connected to \p this, starting from it.
777776
for (VPBlockBase *Block : RPOT) {
778777
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
@@ -781,7 +780,7 @@ void VPRegionBlock::execute(VPTransformState *State) {
781780
}
782781

783782
// Exit replicating mode.
784-
State->Instance.reset();
783+
State->Lane.reset();
785784
}
786785

787786
InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) {

0 commit comments

Comments
 (0)