Skip to content

Commit ee6690a

Browse files
committed
[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to llvm#95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.
1 parent 26fca72 commit ee6690a

12 files changed

+267
-207
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
44424442
case VPDef::VPInstructionSC:
44434443
case VPDef::VPCanonicalIVPHISC:
44444444
case VPDef::VPVectorPointerSC:
4445+
case VPDef::VPReverseVectorPointerSC:
44454446
case VPDef::VPExpandSCEVSC:
44464447
case VPDef::VPEVLBasedIVPHISC:
44474448
case VPDef::VPPredInstPHISC:
@@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
81608161
if (Consecutive) {
81618162
auto *GEP = dyn_cast<GetElementPtrInst>(
81628163
Ptr->getUnderlyingValue()->stripPointerCasts());
8163-
auto *VectorPtr = new VPVectorPointerRecipe(
8164-
Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
8165-
I->getDebugLoc());
8164+
VPSingleDefRecipe *VectorPtr;
8165+
if (Reverse)
8166+
VectorPtr = new VPReverseVectorPointerRecipe(
8167+
Ptr, &Plan.getVF(), getLoadStoreType(I),
8168+
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
8169+
else
8170+
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
8171+
GEP ? GEP->isInBounds() : false,
8172+
I->getDebugLoc());
81668173
Builder.getInsertBlock()->appendRecipe(VectorPtr);
81678174
Ptr = VectorPtr;
81688175
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
882882
case VPRecipeBase::VPReplicateSC:
883883
case VPRecipeBase::VPScalarIVStepsSC:
884884
case VPRecipeBase::VPVectorPointerSC:
885+
case VPRecipeBase::VPReverseVectorPointerSC:
885886
case VPRecipeBase::VPWidenCallSC:
886887
case VPRecipeBase::VPWidenCanonicalIVSC:
887888
case VPRecipeBase::VPWidenCastSC:
@@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10781079
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
10791080
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
10801081
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1082+
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
10811083
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
10821084
}
10831085

@@ -1785,20 +1787,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
17851787
#endif
17861788
};
17871789

1788-
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1789-
/// all parts. If IsReverse is true, compute pointers for accessing the input in
1790-
/// reverse order per part.
1790+
/// A recipe to compute the pointers for widened memory accesses of IndexTy
1791+
/// in reverse order.
1792+
class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
1793+
public VPUnrollPartAccessor<2> {
1794+
Type *IndexedTy;
1795+
1796+
public:
1797+
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
1798+
bool IsInBounds, DebugLoc DL)
1799+
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1800+
ArrayRef<VPValue *>({Ptr, VF}),
1801+
GEPFlagsTy(IsInBounds), DL),
1802+
IndexedTy(IndexedTy) {}
1803+
1804+
VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1805+
1806+
VPValue *getVFValue() { return getOperand(1); }
1807+
const VPValue *getVFValue() const { return getOperand(1); }
1808+
1809+
void execute(VPTransformState &State) override;
1810+
1811+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1812+
assert(is_contained(operands(), Op) &&
1813+
"Op must be an operand of the recipe");
1814+
return true;
1815+
}
1816+
1817+
/// Returns true if the recipe only uses the first part of operand \p Op.
1818+
bool onlyFirstPartUsed(const VPValue *Op) const override {
1819+
assert(is_contained(operands(), Op) &&
1820+
"Op must be an operand of the recipe");
1821+
assert(getNumOperands() <= 2 && "must have at most two operands");
1822+
return true;
1823+
}
1824+
1825+
VPReverseVectorPointerRecipe *clone() override {
1826+
return new VPReverseVectorPointerRecipe(
1827+
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
1828+
}
1829+
1830+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1831+
/// Print the recipe.
1832+
void print(raw_ostream &O, const Twine &Indent,
1833+
VPSlotTracker &SlotTracker) const override;
1834+
#endif
1835+
};
1836+
1837+
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
17911838
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
17921839
public VPUnrollPartAccessor<1> {
17931840
Type *IndexedTy;
1794-
bool IsReverse;
17951841

17961842
public:
1797-
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1798-
bool IsInBounds, DebugLoc DL)
1843+
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
1844+
DebugLoc DL)
17991845
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
18001846
GEPFlagsTy(IsInBounds), DL),
1801-
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1847+
IndexedTy(IndexedTy) {}
18021848

18031849
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
18041850

@@ -1819,8 +1865,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
18191865
}
18201866

18211867
VPVectorPointerRecipe *clone() override {
1822-
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1823-
isInBounds(), getDebugLoc());
1868+
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
1869+
getDebugLoc());
18241870
}
18251871

18261872
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
261261
[](const auto *R) { return R->getScalarType(); })
262262
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
263263
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
264-
VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
265-
return inferScalarType(R->getOperand(0));
266-
})
264+
VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
265+
[this](const VPRecipeBase *R) {
266+
return inferScalarType(R->getOperand(0));
267+
})
267268
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
268269
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
269270
VPWidenSelectRecipe>(

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
154154
case VPDerivedIVSC:
155155
case VPPredInstPHISC:
156156
case VPScalarCastSC:
157+
case VPReverseVectorPointerSC:
157158
return false;
158159
case VPInstructionSC:
159160
return mayWriteToMemory();
@@ -1813,38 +1814,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
18131814
}
18141815
#endif
18151816

1816-
void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1817-
auto &Builder = State.Builder;
1818-
State.setDebugLocFrom(getDebugLoc());
1819-
unsigned CurrentPart = getUnrollPart(*this);
1817+
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
1818+
unsigned CurrentPart, IRBuilderBase &Builder) {
18201819
// Use i32 for the gep index type when the value is constant,
18211820
// or query DataLayout for a more suitable index type otherwise.
18221821
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1823-
Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
1824-
? DL.getIndexType(Builder.getPtrTy(0))
1825-
: Builder.getInt32Ty();
1822+
return IsScalable && (IsReverse || CurrentPart > 0)
1823+
? DL.getIndexType(Builder.getPtrTy(0))
1824+
: Builder.getInt32Ty();
1825+
}
1826+
1827+
void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
1828+
auto &Builder = State.Builder;
1829+
State.setDebugLocFrom(getDebugLoc());
1830+
unsigned CurrentPart = getUnrollPart(*this);
1831+
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
1832+
CurrentPart, Builder);
1833+
1834+
// The wide store needs to start at the last vector element.
1835+
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
1836+
if (IndexTy != RunTimeVF->getType())
1837+
RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
1838+
// NumElt = -CurrentPart * RunTimeVF
1839+
Value *NumElt = Builder.CreateMul(
1840+
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1841+
// LastLane = 1 - RunTimeVF
1842+
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
18261843
Value *Ptr = State.get(getOperand(0), VPLane(0));
18271844
bool InBounds = isInBounds();
1845+
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1846+
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
18281847

1829-
Value *ResultPtr = nullptr;
1830-
if (IsReverse) {
1831-
// If the address is consecutive but reversed, then the
1832-
// wide store needs to start at the last vector element.
1833-
// RunTimeVF = VScale * VF.getKnownMinValue()
1834-
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1835-
Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1836-
// NumElt = -CurrentPart * RunTimeVF
1837-
Value *NumElt = Builder.CreateMul(
1838-
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1839-
// LastLane = 1 - RunTimeVF
1840-
Value *LastLane =
1841-
Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1842-
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1843-
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
1844-
} else {
1845-
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
1846-
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1847-
}
1848+
State.set(this, ResultPtr, /*IsScalar*/ true);
1849+
}
1850+
1851+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1852+
void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1853+
VPSlotTracker &SlotTracker) const {
1854+
O << Indent;
1855+
printAsOperand(O, SlotTracker);
1856+
O << " = reverse-vector-pointer ";
1857+
if (isInBounds())
1858+
O << "inbounds ";
1859+
printOperands(O, SlotTracker);
1860+
}
1861+
#endif
1862+
1863+
void VPVectorPointerRecipe::execute(VPTransformState &State) {
1864+
auto &Builder = State.Builder;
1865+
State.setDebugLocFrom(getDebugLoc());
1866+
unsigned CurrentPart = getUnrollPart(*this);
1867+
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
1868+
CurrentPart, Builder);
1869+
Value *Ptr = State.get(getOperand(0), VPLane(0));
1870+
bool InBounds = isInBounds();
1871+
1872+
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
1873+
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
18481874

18491875
State.set(this, ResultPtr, /*IsScalar*/ true);
18501876
}
@@ -1855,8 +1881,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
18551881
O << Indent;
18561882
printAsOperand(O, SlotTracker);
18571883
O << " = vector-pointer ";
1858-
if (IsReverse)
1859-
O << "(reverse) ";
18601884

18611885
printOperands(O, SlotTracker);
18621886
}

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
316316
// Add operand indicating the part to generate code for, to recipes still
317317
// requiring it.
318318
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
319-
VPVectorPointerRecipe>(Copy) ||
319+
VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
320320
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
321321
m_VPValue())))
322322
Copy->addOperand(getConstantVPV(Part));
323323

324-
if (isa<VPVectorPointerRecipe>(R))
324+
if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
325325
Copy->setOperand(0, R.getOperand(0));
326326
}
327327
}

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ class VPDef {
346346
VPScalarCastSC,
347347
VPScalarIVStepsSC,
348348
VPVectorPointerSC,
349+
VPReverseVectorPointerSC,
349350
VPWidenCallSC,
350351
VPWidenCanonicalIVSC,
351352
VPWidenCastSC,

0 commit comments

Comments
 (0)