Skip to content

Commit 24ec15c

Browse files
committed
[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.
1 parent 6c331e5 commit 24ec15c

12 files changed

+314
-256
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
44424442
case VPDef::VPInstructionSC:
44434443
case VPDef::VPCanonicalIVPHISC:
44444444
case VPDef::VPVectorPointerSC:
4445+
case VPDef::VPReverseVectorPointerSC:
44454446
case VPDef::VPExpandSCEVSC:
44464447
case VPDef::VPEVLBasedIVPHISC:
44474448
case VPDef::VPPredInstPHISC:
@@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
81608161
if (Consecutive) {
81618162
auto *GEP = dyn_cast<GetElementPtrInst>(
81628163
Ptr->getUnderlyingValue()->stripPointerCasts());
8163-
auto *VectorPtr = new VPVectorPointerRecipe(
8164-
Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
8165-
I->getDebugLoc());
8164+
VPSingleDefRecipe *VectorPtr;
8165+
if (Reverse)
8166+
VectorPtr = new VPReverseVectorPointerRecipe(
8167+
Ptr, &Plan.getVF(), getLoadStoreType(I),
8168+
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
8169+
else
8170+
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
8171+
GEP ? GEP->isInBounds() : false,
8172+
I->getDebugLoc());
81668173
Builder.getInsertBlock()->appendRecipe(VectorPtr);
81678174
Ptr = VectorPtr;
81688175
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
882882
case VPRecipeBase::VPReplicateSC:
883883
case VPRecipeBase::VPScalarIVStepsSC:
884884
case VPRecipeBase::VPVectorPointerSC:
885+
case VPRecipeBase::VPReverseVectorPointerSC:
885886
case VPRecipeBase::VPWidenCallSC:
886887
case VPRecipeBase::VPWidenCanonicalIVSC:
887888
case VPRecipeBase::VPWidenCastSC:
@@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10781079
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
10791080
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
10801081
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1082+
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
10811083
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
10821084
}
10831085

@@ -1785,20 +1787,65 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
17851787
#endif
17861788
};
17871789

1790+
/// A recipe to compute the pointers for widened memory accesses of IndexTy
1791+
/// in reverse order per part.
1792+
class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
1793+
public VPUnrollPartAccessor<2> {
1794+
Type *IndexedTy;
1795+
1796+
public:
1797+
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
1798+
bool IsInBounds, DebugLoc DL)
1799+
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1800+
ArrayRef<VPValue *>({Ptr, VF}),
1801+
GEPFlagsTy(IsInBounds), DL),
1802+
IndexedTy(IndexedTy) {}
1803+
1804+
VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1805+
1806+
VPValue *getVFValue() { return getOperand(1); }
1807+
const VPValue *getVFValue() const { return getOperand(1); }
1808+
1809+
void execute(VPTransformState &State) override;
1810+
1811+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1812+
assert(is_contained(operands(), Op) &&
1813+
"Op must be an operand of the recipe");
1814+
return true;
1815+
}
1816+
1817+
/// Returns true if the recipe only uses the first part of operand \p Op.
1818+
bool onlyFirstPartUsed(const VPValue *Op) const override {
1819+
assert(is_contained(operands(), Op) &&
1820+
"Op must be an operand of the recipe");
1821+
assert(getNumOperands() <= 2 && "must have at most two operands");
1822+
return true;
1823+
}
1824+
1825+
VPReverseVectorPointerRecipe *clone() override {
1826+
return new VPReverseVectorPointerRecipe(
1827+
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
1828+
}
1829+
1830+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1831+
/// Print the recipe.
1832+
void print(raw_ostream &O, const Twine &Indent,
1833+
VPSlotTracker &SlotTracker) const override;
1834+
#endif
1835+
};
1836+
17881837
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1789-
/// all parts. If IsReverse is true, compute pointers for accessing the input in
1790-
/// reverse order per part.
1838+
/// all parts.
17911839
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
17921840
public VPUnrollPartAccessor<1> {
17931841
Type *IndexedTy;
1794-
bool IsReverse;
17951842

17961843
public:
1797-
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1798-
bool IsInBounds, DebugLoc DL)
1844+
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
1845+
DebugLoc DL)
17991846
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
18001847
GEPFlagsTy(IsInBounds), DL),
1801-
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1848+
IndexedTy(IndexedTy) {}
18021849

18031850
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
18041851

@@ -1819,8 +1866,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
18191866
}
18201867

18211868
VPVectorPointerRecipe *clone() override {
1822-
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1823-
isInBounds(), getDebugLoc());
1869+
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
1870+
getDebugLoc());
18241871
}
18251872

18261873
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
261261
[](const auto *R) { return R->getScalarType(); })
262262
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
263263
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
264-
VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
265-
return inferScalarType(R->getOperand(0));
266-
})
264+
VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
265+
[this](const VPRecipeBase *R) {
266+
return inferScalarType(R->getOperand(0));
267+
})
267268
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
268269
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
269270
VPWidenSelectRecipe>(

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 43 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,38 +1813,61 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
18131813
}
18141814
#endif
18151815

1816+
void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) {
1817+
auto &Builder = State.Builder;
1818+
State.setDebugLocFrom(getDebugLoc());
1819+
unsigned CurrentPart = getUnrollPart(*this);
1820+
// Use i32 for the gep index type when the value is constant,
1821+
// or query DataLayout for a more suitable index type otherwise.
1822+
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1823+
Type *IndexTy = State.VF.isScalable()
1824+
? DL.getIndexType(IndexedTy->getPointerTo())
1825+
: Builder.getInt32Ty();
1826+
Value *Ptr = State.get(getOperand(0), VPLane(0));
1827+
bool InBounds = isInBounds();
1828+
1829+
// the wide store needs to start at the last vector element.
1830+
// RunTimeVF = VScale * VF.getKnownMinValue()
1831+
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1832+
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
1833+
if (IndexTy != RunTimeVF->getType())
1834+
RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
1835+
// NumElt = -CurrentPart * RunTimeVF
1836+
Value *NumElt = Builder.CreateMul(
1837+
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1838+
// LastLane = 1 - RunTimeVF
1839+
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1840+
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1841+
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
1842+
1843+
State.set(this, ResultPtr, /*IsScalar*/ true);
1844+
}
1845+
1846+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1847+
void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1848+
VPSlotTracker &SlotTracker) const {
1849+
O << Indent;
1850+
printAsOperand(O, SlotTracker);
1851+
O << " = reverse-vector-pointer ";
1852+
printOperands(O, SlotTracker);
1853+
}
1854+
#endif
1855+
18161856
void VPVectorPointerRecipe ::execute(VPTransformState &State) {
18171857
auto &Builder = State.Builder;
18181858
State.setDebugLocFrom(getDebugLoc());
18191859
unsigned CurrentPart = getUnrollPart(*this);
18201860
// Use i32 for the gep index type when the value is constant,
18211861
// or query DataLayout for a more suitable index type otherwise.
18221862
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1823-
Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
1863+
Type *IndexTy = State.VF.isScalable() && (CurrentPart > 0)
18241864
? DL.getIndexType(Builder.getPtrTy(0))
18251865
: Builder.getInt32Ty();
18261866
Value *Ptr = State.get(getOperand(0), VPLane(0));
18271867
bool InBounds = isInBounds();
18281868

1829-
Value *ResultPtr = nullptr;
1830-
if (IsReverse) {
1831-
// If the address is consecutive but reversed, then the
1832-
// wide store needs to start at the last vector element.
1833-
// RunTimeVF = VScale * VF.getKnownMinValue()
1834-
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1835-
Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1836-
// NumElt = -CurrentPart * RunTimeVF
1837-
Value *NumElt = Builder.CreateMul(
1838-
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1839-
// LastLane = 1 - RunTimeVF
1840-
Value *LastLane =
1841-
Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
1842-
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
1843-
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
1844-
} else {
1845-
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
1846-
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
1847-
}
1869+
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
1870+
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
18481871

18491872
State.set(this, ResultPtr, /*IsScalar*/ true);
18501873
}
@@ -1855,8 +1878,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
18551878
O << Indent;
18561879
printAsOperand(O, SlotTracker);
18571880
O << " = vector-pointer ";
1858-
if (IsReverse)
1859-
O << "(reverse) ";
18601881

18611882
printOperands(O, SlotTracker);
18621883
}

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
316316
// Add operand indicating the part to generate code for, to recipes still
317317
// requiring it.
318318
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
319-
VPVectorPointerRecipe>(Copy) ||
319+
VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
320320
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
321321
m_VPValue())))
322322
Copy->addOperand(getConstantVPV(Part));
323323

324-
if (isa<VPVectorPointerRecipe>(R))
324+
if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
325325
Copy->setOperand(0, R.getOperand(0));
326326
}
327327
}

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ class VPDef {
346346
VPScalarCastSC,
347347
VPScalarIVStepsSC,
348348
VPVectorPointerSC,
349+
VPReverseVectorPointerSC,
349350
VPWidenCallSC,
350351
VPWidenCanonicalIVSC,
351352
VPWidenCastSC,

0 commit comments

Comments
 (0)