Skip to content

[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe #110974

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPReverseVectorPointerSC:
case VPDef::VPExpandSCEVSC:
case VPDef::VPEVLBasedIVPHISC:
case VPDef::VPPredInstPHISC:
Expand Down Expand Up @@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Consecutive) {
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
auto *VectorPtr = new VPVectorPointerRecipe(
Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
I->getDebugLoc());
VPSingleDefRecipe *VectorPtr;
if (Reverse)
VectorPtr = new VPReverseVectorPointerRecipe(
Ptr, &Plan.getVF(), getLoadStoreType(I),
GEP ? GEP->isInBounds() : false, I->getDebugLoc());
else
VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
GEP ? GEP->isInBounds() : false,
I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
Expand Down
64 changes: 55 additions & 9 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPReplicateSC:
case VPRecipeBase::VPScalarIVStepsSC:
case VPRecipeBase::VPVectorPointerSC:
case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
Expand Down Expand Up @@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}

Expand Down Expand Up @@ -1785,20 +1787,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
#endif
};

/// A recipe to compute the pointers for widened memory accesses of IndexTy for
/// all parts. If IsReverse is true, compute pointers for accessing the input in
/// reverse order per part.
/// A recipe to compute the pointers for widened memory accesses of IndexTy
/// in reverse order.
class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<2> {
Type *IndexedTy;

public:
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
bool IsInBounds, DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
ArrayRef<VPValue *>({Ptr, VF}),
GEPFlagsTy(IsInBounds), DL),
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)

VPValue *getVFValue() { return getOperand(1); }
const VPValue *getVFValue() const { return getOperand(1); }

void execute(VPTransformState &State) override;

bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
return true;
}

/// Returns true if the recipe only uses the first part of operand \p Op.
bool onlyFirstPartUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
assert(getNumOperands() <= 2 && "must have at most two operands");
return true;
}

VPReverseVectorPointerRecipe *clone() override {
return new VPReverseVectorPointerRecipe(
getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};

/// A recipe to compute the pointers for widened memory accesses of IndexTy.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
Type *IndexedTy;
bool IsReverse;

public:
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
bool IsInBounds, DebugLoc DL)
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
IndexedTy(IndexedTy), IsReverse(IsReverse) {}
IndexedTy(IndexedTy) {}

VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)

Expand All @@ -1819,8 +1865,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}

VPVectorPointerRecipe *clone() override {
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
isInBounds(), getDebugLoc());
return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
getDebugLoc());
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
[this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
Expand Down
80 changes: 52 additions & 28 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPDerivedIVSC:
case VPPredInstPHISC:
case VPScalarCastSC:
case VPReverseVectorPointerSC:
return false;
case VPInstructionSC:
return mayWriteToMemory();
Expand Down Expand Up @@ -1813,38 +1814,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif

void VPVectorPointerRecipe ::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
unsigned CurrentPart = getUnrollPart(*this);
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also add VPReverseVectorPointerSC to mayHaveSideEffects & co?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch! will do it for VPVectorPointerSC separately.

unsigned CurrentPart, IRBuilderBase &Builder) {
// Use i32 for the gep index type when the value is constant,
// or query DataLayout for a more suitable index type otherwise.
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
? DL.getIndexType(Builder.getPtrTy(0))
: Builder.getInt32Ty();
return IsScalable && (IsReverse || CurrentPart > 0)
? DL.getIndexType(Builder.getPtrTy(0))
: Builder.getInt32Ty();
}

void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
unsigned CurrentPart = getUnrollPart(*this);
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
CurrentPart, Builder);

// The wide store needs to start at the last vector element.
Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
if (IndexTy != RunTimeVF->getType())
RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
// NumElt = -CurrentPart * RunTimeVF
Value *NumElt = Builder.CreateMul(
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
// LastLane = 1 - RunTimeVF
Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);

Value *ResultPtr = nullptr;
if (IsReverse) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
// RunTimeVF = VScale * VF.getKnownMinValue()
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
// NumElt = -CurrentPart * RunTimeVF
Value *NumElt = Builder.CreateMul(
ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
// LastLane = 1 - RunTimeVF
Value *LastLane =
Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
} else {
Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
}
State.set(this, ResultPtr, /*IsScalar*/ true);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << " = reverse-vector-pointer ";
if (isInBounds())
O << "inbounds ";
printOperands(O, SlotTracker);
}
#endif

void VPVectorPointerRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
unsigned CurrentPart = getUnrollPart(*this);
Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
CurrentPart, Builder);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();

Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);

State.set(this, ResultPtr, /*IsScalar*/ true);
}
Expand All @@ -1855,8 +1881,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent;
printAsOperand(O, SlotTracker);
O << " = vector-pointer ";
if (IsReverse)
O << "(reverse) ";

printOperands(O, SlotTracker);
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
VPVectorPointerRecipe>(Copy) ||
VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
m_VPValue())))
Copy->addOperand(getConstantVPV(Part));

if (isa<VPVectorPointerRecipe>(R))
if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,7 @@ class VPDef {
VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
VPReverseVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
Expand Down
Loading
Loading