Skip to content

Commit 266ff98

Browse files
authored
[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe (#110974)
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.
1 parent 0f4b3c4 commit 266ff98

12 files changed

+267
-207
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4492,6 +4492,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
44924492
case VPDef::VPInstructionSC:
44934493
case VPDef::VPCanonicalIVPHISC:
44944494
case VPDef::VPVectorPointerSC:
4495+
case VPDef::VPReverseVectorPointerSC:
44954496
case VPDef::VPExpandSCEVSC:
44964497
case VPDef::VPEVLBasedIVPHISC:
44974498
case VPDef::VPPredInstPHISC:
@@ -8278,9 +8279,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
82788279
if (Consecutive) {
82798280
auto *GEP = dyn_cast<GetElementPtrInst>(
82808281
Ptr->getUnderlyingValue()->stripPointerCasts());
8281-
auto *VectorPtr = new VPVectorPointerRecipe(
8282-
Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
8283-
I->getDebugLoc());
8282+
VPSingleDefRecipe *VectorPtr;
8283+
if (Reverse)
8284+
VectorPtr = new VPReverseVectorPointerRecipe(
8285+
Ptr, &Plan.getVF(), getLoadStoreType(I),
8286+
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
8287+
else
8288+
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
8289+
GEP ? GEP->isInBounds() : false,
8290+
I->getDebugLoc());
82848291
Builder.getInsertBlock()->appendRecipe(VectorPtr);
82858292
Ptr = VectorPtr;
82868293
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
905905
case VPRecipeBase::VPReplicateSC:
906906
case VPRecipeBase::VPScalarIVStepsSC:
907907
case VPRecipeBase::VPVectorPointerSC:
908+
case VPRecipeBase::VPReverseVectorPointerSC:
908909
case VPRecipeBase::VPWidenCallSC:
909910
case VPRecipeBase::VPWidenCanonicalIVSC:
910911
case VPRecipeBase::VPWidenCastSC:
@@ -1110,6 +1111,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
11101111
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
11111112
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
11121113
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1114+
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
11131115
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
11141116
}
11151117

@@ -1910,20 +1912,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
19101912
#endif
19111913
};
19121914

1913-
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1914-
/// all parts. If IsReverse is true, compute pointers for accessing the input in
1915-
/// reverse order per part.
1915+
/// A recipe to compute the pointers for widened memory accesses of IndexTy
1916+
/// in reverse order.
1917+
class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
1918+
public VPUnrollPartAccessor<2> {
1919+
Type *IndexedTy;
1920+
1921+
public:
1922+
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
1923+
bool IsInBounds, DebugLoc DL)
1924+
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1925+
ArrayRef<VPValue *>({Ptr, VF}),
1926+
GEPFlagsTy(IsInBounds), DL),
1927+
IndexedTy(IndexedTy) {}
1928+
1929+
VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1930+
1931+
VPValue *getVFValue() { return getOperand(1); }
1932+
const VPValue *getVFValue() const { return getOperand(1); }
1933+
1934+
void execute(VPTransformState &State) override;
1935+
1936+
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1937+
assert(is_contained(operands(), Op) &&
1938+
"Op must be an operand of the recipe");
1939+
return true;
1940+
}
1941+
1942+
/// Returns true if the recipe only uses the first part of operand \p Op.
1943+
bool onlyFirstPartUsed(const VPValue *Op) const override {
1944+
assert(is_contained(operands(), Op) &&
1945+
"Op must be an operand of the recipe");
1946+
assert(getNumOperands() <= 2 && "must have at most two operands");
1947+
return true;
1948+
}
1949+
1950+
VPReverseVectorPointerRecipe *clone() override {
1951+
return new VPReverseVectorPointerRecipe(
1952+
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
1953+
}
1954+
1955+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1956+
/// Print the recipe.
1957+
void print(raw_ostream &O, const Twine &Indent,
1958+
VPSlotTracker &SlotTracker) const override;
1959+
#endif
1960+
};
1961+
1962+
/// A recipe to compute the pointers for widened memory accesses of IndexTy.
19161963
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
19171964
public VPUnrollPartAccessor<1> {
19181965
Type *IndexedTy;
1919-
bool IsReverse;
19201966

19211967
public:
1922-
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1923-
bool IsInBounds, DebugLoc DL)
1968+
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
1969+
DebugLoc DL)
19241970
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
19251971
GEPFlagsTy(IsInBounds), DL),
1926-
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1972+
IndexedTy(IndexedTy) {}
19271973

19281974
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
19291975

@@ -1944,8 +1990,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
19441990
}
19451991

19461992
VPVectorPointerRecipe *clone() override {
1947-
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1948-
isInBounds(), getDebugLoc());
1993+
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
1994+
getDebugLoc());
19491995
}
19501996

19511997
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
263263
[](const auto *R) { return R->getScalarType(); })
264264
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
265265
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
266-
VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
267-
return inferScalarType(R->getOperand(0));
268-
})
266+
VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
267+
[this](const VPRecipeBase *R) {
268+
return inferScalarType(R->getOperand(0));
269+
})
269270
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
270271
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
271272
VPWidenSelectRecipe>(

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
162162
case VPDerivedIVSC:
163163
case VPPredInstPHISC:
164164
case VPScalarCastSC:
165+
case VPReverseVectorPointerSC:
165166
return false;
166167
case VPInstructionSC:
167168
return mayWriteToMemory();
@@ -1971,38 +1972,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
19711972
}
19721973
#endif
19731974

1974-
void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1975-
auto &Builder = State.Builder;
1976-
State.setDebugLocFrom(getDebugLoc());
1977-
unsigned CurrentPart = getUnrollPart(*this);
1975+
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
1976+
unsigned CurrentPart, IRBuilderBase &Builder) {
19781977
// Use i32 for the gep index type when the value is constant,
19791978
// or query DataLayout for a more suitable index type otherwise.
19801979
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
1981-
Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
1982-
? DL.getIndexType(Builder.getPtrTy(0))
1983-
: Builder.getInt32Ty();
1980+
return IsScalable && (IsReverse || CurrentPart > 0)
1981+
? DL.getIndexType(Builder.getPtrTy(0))
1982+
: Builder.getInt32Ty();
1983+
}
1984+
1985+
void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
1986+
auto &Builder = State.Builder;
1987+
State.setDebugLocFrom(getDebugLoc());
1988+
unsigned CurrentPart = getUnrollPart(*this);
1989+
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
1990+
CurrentPart, Builder);
1991+
1992+
// The wide store needs to start at the last vector element.
1993+
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
1994+
if (IndexTy != RunTimeVF->getType())
1995+
RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
1996+
// NumElt = -CurrentPart * RunTimeVF
1997+
Value *NumElt = Builder.CreateMul(
1998+
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1999+
// LastLane = 1 - RunTimeVF
2000+
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
19842001
Value *Ptr = State.get(getOperand(0), VPLane(0));
19852002
bool InBounds = isInBounds();
2003+
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
2004+
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
19862005

1987-
Value *ResultPtr = nullptr;
1988-
if (IsReverse) {
1989-
// If the address is consecutive but reversed, then the
1990-
// wide store needs to start at the last vector element.
1991-
// RunTimeVF = VScale * VF.getKnownMinValue()
1992-
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1993-
Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
1994-
// NumElt = -CurrentPart * RunTimeVF
1995-
Value *NumElt = Builder.CreateMul(
1996-
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
1997-
// LastLane = 1 - RunTimeVF
1998-
Value *LastLane =
1999-
Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
2000-
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
2001-
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
2002-
} else {
2003-
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2004-
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
2005-
}
2006+
State.set(this, ResultPtr, /*IsScalar*/ true);
2007+
}
2008+
2009+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2010+
void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
2011+
VPSlotTracker &SlotTracker) const {
2012+
O << Indent;
2013+
printAsOperand(O, SlotTracker);
2014+
O << " = reverse-vector-pointer ";
2015+
if (isInBounds())
2016+
O << "inbounds ";
2017+
printOperands(O, SlotTracker);
2018+
}
2019+
#endif
2020+
2021+
void VPVectorPointerRecipe::execute(VPTransformState &State) {
2022+
auto &Builder = State.Builder;
2023+
State.setDebugLocFrom(getDebugLoc());
2024+
unsigned CurrentPart = getUnrollPart(*this);
2025+
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
2026+
CurrentPart, Builder);
2027+
Value *Ptr = State.get(getOperand(0), VPLane(0));
2028+
bool InBounds = isInBounds();
2029+
2030+
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
2031+
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
20062032

20072033
State.set(this, ResultPtr, /*IsScalar*/ true);
20082034
}
@@ -2013,8 +2039,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
20132039
O << Indent;
20142040
printAsOperand(O, SlotTracker);
20152041
O << " = vector-pointer ";
2016-
if (IsReverse)
2017-
O << "(reverse) ";
20182042

20192043
printOperands(O, SlotTracker);
20202044
}

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
316316
// Add operand indicating the part to generate code for, to recipes still
317317
// requiring it.
318318
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
319-
VPVectorPointerRecipe>(Copy) ||
319+
VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
320320
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
321321
m_VPValue())))
322322
Copy->addOperand(getConstantVPV(Part));
323323

324-
if (isa<VPVectorPointerRecipe>(R))
324+
if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
325325
Copy->setOperand(0, R.getOperand(0));
326326
}
327327
}

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ class VPDef {
346346
VPScalarCastSC,
347347
VPScalarIVStepsSC,
348348
VPVectorPointerSC,
349+
VPReverseVectorPointerSC,
349350
VPWidenCallSC,
350351
VPWidenCanonicalIVSC,
351352
VPWidenCastSC,

0 commit comments

Comments
 (0)