Skip to content

Commit 06c3a7d

Browse files
committed
[VPlan] Remove unneeded State.UF after 8ec4067 (NFC).
State.UF is not needed any longer after 8ec4067 (#95842). Clean it up, simplifying ::execute of existing recipes.
1 parent 76cffc2 commit 06c3a7d

File tree

4 files changed

+510
-706
lines changed

4 files changed

+510
-706
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7440,7 +7440,7 @@ static void createAndCollectMergePhiForReduction(
74407440
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
74417441

74427442
Value *FinalValue =
7443-
State.get(RedResult, VPIteration(State.UF - 1, VPLane::getFirstLane()));
7443+
State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
74447444
auto *ResumePhi =
74457445
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
74467446
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
@@ -9453,24 +9453,8 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
94539453
}
94549454

94559455
if (IsUniform) {
9456-
// If the recipe is uniform across all parts (instead of just per VF), only
9457-
// generate a single instance.
9458-
if ((isa<LoadInst>(UI) || isa<StoreInst>(UI)) &&
9459-
all_of(operands(),
9460-
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) {
9461-
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
9462-
if (user_begin() != user_end()) {
9463-
for (unsigned Part = 1; Part < State.UF; ++Part)
9464-
State.set(this, State.get(this, VPIteration(0, 0)),
9465-
VPIteration(Part, 0));
9466-
}
9467-
return;
9468-
}
9469-
9470-
// Uniform within VL means we need to generate lane 0 only for each
9471-
// unrolled copy.
9472-
for (unsigned Part = 0; Part < State.UF; ++Part)
9473-
State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, 0), State);
9456+
// Uniform within VL means we need to generate lane 0.
9457+
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
94749458
return;
94759459
}
94769460

@@ -9479,17 +9463,15 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
94799463
if (isa<StoreInst>(UI) &&
94809464
vputils::isUniformAfterVectorization(getOperand(1))) {
94819465
auto Lane = VPLane::getLastLaneForVF(State.VF);
9482-
State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),
9483-
State);
9466+
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
94849467
return;
94859468
}
94869469

94879470
// Generate scalar instances for all VF lanes of all UF parts.
94889471
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
94899472
const unsigned EndLane = State.VF.getKnownMinValue();
9490-
for (unsigned Part = 0; Part < State.UF; ++Part)
9491-
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
9492-
State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);
9473+
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
9474+
State.ILV->scalarizeInstruction(UI, this, VPIteration(0, Lane), State);
94939475
}
94949476

94959477
// Determine how to lower the scalar epilogue, which depends on 1) optimising

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
225225
VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
226226
DominatorTree *DT, IRBuilderBase &Builder,
227227
InnerLoopVectorizer *ILV, VPlan *Plan)
228-
: VF(VF), UF(UF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
228+
: VF(VF), CFG(DT), LI(LI), Builder(Builder), ILV(ILV), Plan(Plan),
229229
LVer(nullptr), TypeAnalysis(Plan->getCanonicalIV()->getScalarType()) {}
230230

231231
Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
@@ -772,9 +772,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
772772

773773
// Enter replicating mode.
774774
State->Instance = VPIteration(0, 0);
775-
776-
for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
777-
State->Instance->Part = Part;
778775
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
779776
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
780777
++Lane) {
@@ -784,7 +781,6 @@ void VPRegionBlock::execute(VPTransformState *State) {
784781
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
785782
Block->execute(State);
786783
}
787-
}
788784
}
789785

790786
// Exit replicating mode.
@@ -963,16 +959,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
963959
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
964960
// FIXME: Model VF * UF computation completely in VPlan.
965961
assert(VFxUF.getNumUsers() && "VFxUF expected to always have users");
962+
unsigned UF = getUF();
966963
if (VF.getNumUsers()) {
967964
Value *RuntimeVF = getRuntimeVF(Builder, TCTy, State.VF);
968965
VF.setUnderlyingValue(RuntimeVF);
969966
VFxUF.setUnderlyingValue(
970-
State.UF > 1
971-
? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, State.UF))
972-
: RuntimeVF);
967+
UF > 1 ? Builder.CreateMul(RuntimeVF, ConstantInt::get(TCTy, UF))
968+
: RuntimeVF);
973969
} else {
974-
VFxUF.setUnderlyingValue(
975-
createStepForVF(Builder, TCTy, State.VF, State.UF));
970+
VFxUF.setUnderlyingValue(createStepForVF(Builder, TCTy, State.VF, UF));
976971
}
977972

978973
// When vectorizing the epilogue loop, the canonical induction start value
@@ -1019,10 +1014,6 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
10191014
/// Assumes a single pre-header basic-block was created for this. Introduce
10201015
/// additional basic-blocks as needed, and fill them all.
10211016
void VPlan::execute(VPTransformState *State) {
1022-
// Set UF to 1, as the unrollByUF VPlan transform already explicitly unrolled
1023-
// the VPlan.
1024-
// TODO: Remove State::UF and all uses.
1025-
State->UF = 1;
10261017
// Initialize CFG state.
10271018
State->CFG.PrevVPBB = nullptr;
10281019
State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor();
@@ -1106,28 +1097,13 @@ void VPlan::execute(VPTransformState *State) {
11061097
}
11071098

11081099
auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
1109-
// For canonical IV, first-order recurrences and in-order reduction phis,
1110-
// only a single part is generated, which provides the last part from the
1111-
// previous iteration. For non-ordered reductions all UF parts are
1112-
// generated.
1113-
bool SinglePartNeeded =
1114-
isa<VPCanonicalIVPHIRecipe>(PhiR) ||
1115-
isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
1116-
(isa<VPReductionPHIRecipe>(PhiR) &&
1117-
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
11181100
bool NeedsScalar =
11191101
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
11201102
(isa<VPReductionPHIRecipe>(PhiR) &&
11211103
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1122-
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
1123-
1124-
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
1125-
Value *Phi = State->get(PhiR, Part, NeedsScalar);
1126-
Value *Val =
1127-
State->get(PhiR->getBackedgeValue(),
1128-
SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
1129-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
1130-
}
1104+
Value *Phi = State->get(PhiR, 0, NeedsScalar);
1105+
Value *Val = State->get(PhiR->getBackedgeValue(), 0, NeedsScalar);
1106+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
11311107
}
11321108

11331109
State->CFG.DTU.flush();

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,6 @@ struct VPTransformState {
256256

257257
/// The chosen Vectorization and Unroll Factors of the loop being vectorized.
258258
ElementCount VF;
259-
unsigned UF;
260259

261260
/// Hold the indices to generate specific scalar instructions. Null indicates
262261
/// that all instances are to be generated, using either scalar or vector
@@ -309,7 +308,7 @@ struct VPTransformState {
309308
assert((VF.isScalar() || V->getType()->isVectorTy()) &&
310309
"scalar values must be stored as (Part, 0)");
311310
if (!Data.PerPartOutput.count(Def)) {
312-
DataState::PerPartValuesTy Entry(UF);
311+
DataState::PerPartValuesTy Entry(1);
313312
Data.PerPartOutput[Def] = Entry;
314313
}
315314
Data.PerPartOutput[Def][Part] = V;
@@ -1306,11 +1305,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
13061305
/// needed.
13071306
bool canGenerateScalarForFirstLane() const;
13081307

1309-
/// Utility methods serving execute(): generates a single instance of the
1310-
/// modeled instruction for a given part. \returns the generated value for \p
1311-
/// Part. In some cases an existing value is returned rather than a generated
1312-
/// one.
1313-
Value *generatePerPart(VPTransformState &State, unsigned Part);
1308+
/// Utility methods serving execute(): generates a single vector instance of
1309+
/// the modeled instruction. \returns the generated value. . In some cases an
1310+
/// existing value is returned rather than a generated one.
1311+
Value *generate(VPTransformState &State);
13141312

13151313
/// Utility methods serving execute(): generates a scalar single instance of
13161314
/// the modeled instruction for a given lane. \returns the scalar generated
@@ -1616,7 +1614,7 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
16161614

16171615
Type *ResultTy;
16181616

1619-
Value *generate(VPTransformState &State, unsigned Part);
1617+
Value *generate(VPTransformState &State);
16201618

16211619
public:
16221620
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)

0 commit comments

Comments
 (0)