-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LV][VPlan] Use VF VPValue in VPVectorPointerRecipe #110974
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v Author: Shih-Po Hung (arcbbb) ChangesRefactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe. Patch is 48.21 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110974.diff 11 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6e082b1c134dee..220625908dff9e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
+ case VPDef::VPReverseVectorPointerSC:
case VPDef::VPExpandSCEVSC:
case VPDef::VPEVLBasedIVPHISC:
case VPDef::VPPredInstPHISC:
@@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Consecutive) {
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
- auto *VectorPtr = new VPVectorPointerRecipe(
- Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
- I->getDebugLoc());
+ VPSingleDefRecipe *VectorPtr;
+ if (Reverse)
+ VectorPtr = new VPReverseVectorPointerRecipe(
+ Ptr, &Plan.getVF(), getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+ else
+ VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false,
+ I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 594492344d43ce..24e066753f9da4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPReplicateSC:
case VPRecipeBase::VPScalarIVStepsSC:
case VPRecipeBase::VPVectorPointerSC:
+ case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
@@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+ R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1785,20 +1787,65 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
#endif
};
+/// A recipe to compute the pointers for widened memory accesses of IndexTy
+/// in reverse order per part.
+class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<2> {
+ Type *IndexedTy;
+
+public:
+ VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
+ bool IsInBounds, DebugLoc DL)
+ : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+ ArrayRef<VPValue *>({Ptr, VF}),
+ GEPFlagsTy(IsInBounds), DL),
+ IndexedTy(IndexedTy) {}
+
+ VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
+
+ VPValue *getVFValue() { return getOperand(1); }
+ const VPValue *getVFValue() const { return getOperand(1); }
+
+ void execute(VPTransformState &State) override;
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ assert(getNumOperands() <= 2 && "must have at most two operands");
+ return true;
+ }
+
+ VPReverseVectorPointerRecipe *clone() override {
+ return new VPReverseVectorPointerRecipe(
+ getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe to compute the pointers for widened memory accesses of IndexTy for
-/// all parts. If IsReverse is true, compute pointers for accessing the input in
-/// reverse order per part.
+/// all parts.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
Type *IndexedTy;
- bool IsReverse;
public:
- VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
- bool IsInBounds, DebugLoc DL)
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+ DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
- IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+ IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1819,8 +1866,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPVectorPointerRecipe *clone() override {
- return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
- isInBounds(), getDebugLoc());
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
+ getDebugLoc());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 277df0637372d8..7bfbcc3dff0e1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
- VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
- return inferScalarType(R->getOperand(0));
- })
+ VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
+ [this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0d092b9c10acc8..bcf7b6d9b64c24 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1813,6 +1813,46 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPReverseVectorPointerRecipe ::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ unsigned CurrentPart = getUnrollPart(*this);
+ // Use i32 for the gep index type when the value is constant,
+ // or query DataLayout for a more suitable index type otherwise.
+ const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
+ Type *IndexTy = State.VF.isScalable()
+ ? DL.getIndexType(IndexedTy->getPointerTo())
+ : Builder.getInt32Ty();
+ Value *Ptr = State.get(getOperand(0), VPLane(0));
+ bool InBounds = isInBounds();
+
+ // the wide store needs to start at the last vector element.
+ // RunTimeVF = VScale * VF.getKnownMinValue()
+ // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+ Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
+ if (IndexTy != RunTimeVF->getType())
+ RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
+ // NumElt = -CurrentPart * RunTimeVF
+ Value *NumElt = Builder.CreateMul(
+ ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
+
+ State.set(this, ResultPtr, /*IsScalar*/ true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = reverse-vector-pointer ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
void VPVectorPointerRecipe ::execute(VPTransformState &State) {
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
@@ -1820,31 +1860,14 @@ void VPVectorPointerRecipe ::execute(VPTransformState &State) {
// Use i32 for the gep index type when the value is constant,
// or query DataLayout for a more suitable index type otherwise.
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
+ Type *IndexTy = State.VF.isScalable() && (CurrentPart > 0)
? DL.getIndexType(Builder.getPtrTy(0))
: Builder.getInt32Ty();
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
- Value *ResultPtr = nullptr;
- if (IsReverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -CurrentPart * RunTimeVF
- Value *NumElt = Builder.CreateMul(
- ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
- } else {
- Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
- }
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -1855,8 +1878,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent;
printAsOperand(O, SlotTracker);
O << " = vector-pointer ";
- if (IsReverse)
- O << "(reverse) ";
printOperands(O, SlotTracker);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef71..1e32865e8ee576 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorPointerRecipe>(Copy) ||
+ VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
m_VPValue())))
Copy->addOperand(getConstantVPV(Part));
- if (isa<VPVectorPointerRecipe>(R))
+ if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 4c383244f96f1a..d7626e437db338 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -346,6 +346,7 @@ class VPDef {
VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
+ VPReverseVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index b6a9fed507acd3..9631dfd5f15913 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -53,13 +53,14 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
-; CHECK-NEXT: Live-in vp<%0> = VF * UF
-; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
-; CHECK-NEXT: vp<%2> = original trip-count
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body.preheader>:
; CHECK-NEXT: IR %0 = zext i32 %n to i64
-; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (zext i32 %n to i64)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
@@ -67,27 +68,27 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
-; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
-; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
-; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%5>, ir<-1>
+; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%9>
+; CHECK-NEXT: vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1>
+; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%5>, ir<-1>
+; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%6>, ir<-1>
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT: vp<%6> = vector-pointer (reverse) ir<%arrayidx>
-; CHECK-NEXT: WIDEN ir<%1> = load vp<%6>
+; CHECK-NEXT: vp<%7> = reverse-vector-pointer ir<%arrayidx>, vp<%0>
+; CHECK-NEXT: WIDEN ir<%1> = load vp<%7>
; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1>
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT: vp<%7> = vector-pointer (reverse) ir<%arrayidx3>
-; CHECK-NEXT: WIDEN store vp<%7>, ir<%add9>
-; CHECK-NEXT: EMIT vp<%8> = add nuw vp<%3>, vp<%0>
-; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%1>
+; CHECK-NEXT: vp<%8> = reverse-vector-pointer ir<%arrayidx3>, vp<%0>
+; CHECK-NEXT: WIDEN store vp<%8>, ir<%add9>
+; CHECK-NEXT: EMIT vp<%9> = add nuw vp<%4>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%9>, vp<%2>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%10> = icmp eq vp<%2>, vp<%1>
-; CHECK-NEXT: EMIT branch-on-cond vp<%10>
+; CHECK-NEXT: EMIT vp<%11> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%11>
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
@@ -137,13 +138,14 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop
; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1
; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
-; CHECK-NEXT: Live-in vp<%0> = VF * UF
-; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
-; CHECK-NEXT: vp<%2> = original trip-count
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body.preheader>:
; CHECK-NEXT: IR %0 = zext i32 %n to i64
-; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (zext i32 %n to i64)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
@@ -151,27 +153,27 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
-; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
-; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
-; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%5>, ir<-1>
+; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%9>
+; CHECK-NEXT: vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1>
+; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%5>, ir<-1>
+; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%6>, ir<-1>
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT: vp<%6> = vector-pointer (reverse) ir<%arrayidx>
-; CHECK-NEXT: WIDEN ir<%13> = load vp<%6>
+; CHECK-NEXT: vp<%7> = reverse-vector-pointer ir<%arrayidx>, vp<%0>
+; CHECK-NEXT: WIDEN ir<%13> = load vp<%7>
; CHECK-NEXT: WIDEN ir<%add9> = add ir<%13>, ir<1>
; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT: vp<%7> = vector-pointer (reverse) ir<%arrayidx3>
-; CHECK-NEXT: WIDEN store vp<%7>, ir<%add9>
-; CHECK-NEXT: EMIT vp<%8> = add nuw vp<%3>, vp<%0>
-; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%1>
+; CHECK-NEXT: vp<%8> = reverse-vector-pointer ir<%arrayidx3>, vp<%0>
+; CHECK-NEXT: WIDEN store vp<%8>, ir<%add9>
+; CHECK-NEXT: EMIT vp<%9> = add nuw vp<%4>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%9>, vp<%2>
; CHECK-NEXT: No successors
; CHECK-NEXT: }
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<%10> = icmp eq vp<%2>, vp<%1>
-; CHECK-NEXT: EMIT branch-on-cond vp<%10>
+; CHECK-NEXT: EMIT vp<%11> = icmp eq vp<%3>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%11>
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, scalar.ph
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.cond.cleanup.loopexit>:
@@ -257,13 +259,14 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
-; CHECK-NEXT: Live-in vp<%0> = VF * UF
-; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
-; CHECK-NEXT: vp<%2> = original trip-count
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: vp<%3> = original trip-count
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body.preheader>:
; CHECK-NEXT: IR %0 = zext i32 %n to i64
-; CHECK-NEXT: EMIT vp<%2> = EXPAND SCEV (zext i32 %n to i64)
+; CHECK-NEXT: EMIT vp<%3> = EXPAND SCEV (zext i32 %n to i64)
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: vector.ph:
@@ -271,27 +274,27 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-EMPTY:
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
-; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%8>
-; CHECK-NEXT: vp<%4> = DERIVED-IV ir<%n> + vp<%3> * ir<-1>
-; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<-1>
-; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<...
[truncated]
|
5eb4fd0
to
24ec15c
Compare
; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%9> | ||
; CHECK-NEXT: vp<%5> = DERIVED-IV ir<%n> + vp<%4> * ir<-1> | ||
; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%5>, ir<-1> | ||
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<%6>, ir<-1> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible to update the test to use patterns for the VPValue numbers separately, to avoid the unrelated changes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure, separate it into #111086
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to llvm#95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.
bd2a773
to
9f1c301
Compare
rebased to avoid unrelated changes on tests |
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); | ||
Type *IndexTy = State.VF.isScalable() | ||
? DL.getIndexType(IndexedTy->getPointerTo()) | ||
: Builder.getInt32Ty(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be good to share this code between both vector pointer recipes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, I move them to getGEPIndexTy().
Co-authored-by: Florian Hahn <[email protected]>
Co-authored-by: Florian Hahn <[email protected]>
Co-authored-by: Florian Hahn <[email protected]>
- refine stale comments - sink to use - add inbounds in print()
auto &Builder = State.Builder; | ||
State.setDebugLocFrom(getDebugLoc()); | ||
unsigned CurrentPart = getUnrollPart(*this); | ||
static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you also add VPReverseVectorPointerSC
to mayHaveSideEffects & co?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice catch! will do it for VPVectorPointerSC separately.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
But should probably fold the mayHaveSideEffects
change from #113410 into this PR
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to llvm#95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/4/builds/3122 Here is the relevant piece of the build log for the reference
|
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to llvm#95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.
Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305.
Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe.