-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Refine VPVectorPointer printing for inbounds #113410
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Shih-Po Hung (arcbbb) ChangesFollowing #110974, update VPVectorPointerRecipe as well. Patch is 102.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113410.diff 23 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6e082b1c134dee..220625908dff9e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
+ case VPDef::VPReverseVectorPointerSC:
case VPDef::VPExpandSCEVSC:
case VPDef::VPEVLBasedIVPHISC:
case VPDef::VPPredInstPHISC:
@@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Consecutive) {
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
- auto *VectorPtr = new VPVectorPointerRecipe(
- Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
- I->getDebugLoc());
+ VPSingleDefRecipe *VectorPtr;
+ if (Reverse)
+ VectorPtr = new VPReverseVectorPointerRecipe(
+ Ptr, &Plan.getVF(), getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+ else
+ VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false,
+ I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 594492344d43ce..8df63b5796c739 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPReplicateSC:
case VPRecipeBase::VPScalarIVStepsSC:
case VPRecipeBase::VPVectorPointerSC:
+ case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
@@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+ R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1785,20 +1787,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
#endif
};
-/// A recipe to compute the pointers for widened memory accesses of IndexTy for
-/// all parts. If IsReverse is true, compute pointers for accessing the input in
-/// reverse order per part.
+/// A recipe to compute the pointers for widened memory accesses of IndexTy
+/// in reverse order.
+class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<2> {
+ Type *IndexedTy;
+
+public:
+ VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
+ bool IsInBounds, DebugLoc DL)
+ : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+ ArrayRef<VPValue *>({Ptr, VF}),
+ GEPFlagsTy(IsInBounds), DL),
+ IndexedTy(IndexedTy) {}
+
+ VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
+
+ VPValue *getVFValue() { return getOperand(1); }
+ const VPValue *getVFValue() const { return getOperand(1); }
+
+ void execute(VPTransformState &State) override;
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ assert(getNumOperands() <= 2 && "must have at most two operands");
+ return true;
+ }
+
+ VPReverseVectorPointerRecipe *clone() override {
+ return new VPReverseVectorPointerRecipe(
+ getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// A recipe to compute the pointers for widened memory accesses of IndexTy.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
Type *IndexedTy;
- bool IsReverse;
public:
- VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
- bool IsInBounds, DebugLoc DL)
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+ DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
- IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+ IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1819,8 +1865,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPVectorPointerRecipe *clone() override {
- return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
- isInBounds(), getDebugLoc());
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
+ getDebugLoc());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 277df0637372d8..7bfbcc3dff0e1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
- VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
- return inferScalarType(R->getOperand(0));
- })
+ VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
+ [this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0d092b9c10acc8..1b715a28604e33 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -154,6 +154,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPDerivedIVSC:
case VPPredInstPHISC:
case VPScalarCastSC:
+ case VPVectorPointerSC:
+ case VPReverseVectorPointerSC:
return false;
case VPInstructionSC:
return mayWriteToMemory();
@@ -1813,38 +1815,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPVectorPointerRecipe ::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
- State.setDebugLocFrom(getDebugLoc());
- unsigned CurrentPart = getUnrollPart(*this);
+static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
+ unsigned CurrentPart, IRBuilderBase &Builder) {
// Use i32 for the gep index type when the value is constant,
// or query DataLayout for a more suitable index type otherwise.
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
- ? DL.getIndexType(Builder.getPtrTy(0))
- : Builder.getInt32Ty();
+ return IsScalable && (IsReverse || CurrentPart > 0)
+ ? DL.getIndexType(Builder.getPtrTy(0))
+ : Builder.getInt32Ty();
+}
+
+void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ unsigned CurrentPart = getUnrollPart(*this);
+ Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
+ CurrentPart, Builder);
+
+ // The wide store needs to start at the last vector element.
+ Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
+ if (IndexTy != RunTimeVF->getType())
+ RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
+ // NumElt = -CurrentPart * RunTimeVF
+ Value *NumElt = Builder.CreateMul(
+ ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
- Value *ResultPtr = nullptr;
- if (IsReverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -CurrentPart * RunTimeVF
- Value *NumElt = Builder.CreateMul(
- ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
- } else {
- Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
- }
+ State.set(this, ResultPtr, /*IsScalar*/ true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = reverse-vector-pointer ";
+ if (isInBounds())
+ O << "inbounds ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPVectorPointerRecipe::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ unsigned CurrentPart = getUnrollPart(*this);
+ Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
+ CurrentPart, Builder);
+ Value *Ptr = State.get(getOperand(0), VPLane(0));
+ bool InBounds = isInBounds();
+
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -1855,8 +1882,9 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent;
printAsOperand(O, SlotTracker);
O << " = vector-pointer ";
- if (IsReverse)
- O << "(reverse) ";
+
+ if (isInBounds())
+ O << "inbounds ";
printOperands(O, SlotTracker);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef71..1e32865e8ee576 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorPointerRecipe>(Copy) ||
+ VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
m_VPValue())))
Copy->addOperand(getConstantVPV(Part));
- if (isa<VPVectorPointerRecipe>(R))
+ if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 4c383244f96f1a..d7626e437db338 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -346,6 +346,7 @@ class VPDef {
VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
+ VPReverseVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index 81121019efe767..76562e80fbc4a1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -24,43 +24,36 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[N]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3
-; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP18]], align 8
-; CHECK-NEXT: [[TMP19:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3
-; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]]
-; CHECK-NEXT: store <vscale x 8 x double> [[TMP19]], ptr [[TMP25]], align 8
-; CHECK-NEXT: store <vscale x 8 x double> [[TMP20]], ptr [[TMP31]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]]
+; CHECK-NEXT: store <vscale x 8 x double> [[TMP16]], ptr [[TMP20]], align 8
+; CHECK-NEXT: store <vscale x 8 x double> [[TMP17]], ptr [[TMP24]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -75,8 +68,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
-; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00
+; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
@@ -126,43 +119,36 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP7:%.*]] = cal...
[truncated]
|
@llvm/pr-subscribers-backend-risc-v Author: Shih-Po Hung (arcbbb) ChangesFollowing #110974, update VPVectorPointerRecipe as well. Patch is 102.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113410.diff 23 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6e082b1c134dee..220625908dff9e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
+ case VPDef::VPReverseVectorPointerSC:
case VPDef::VPExpandSCEVSC:
case VPDef::VPEVLBasedIVPHISC:
case VPDef::VPPredInstPHISC:
@@ -8160,9 +8161,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
if (Consecutive) {
auto *GEP = dyn_cast<GetElementPtrInst>(
Ptr->getUnderlyingValue()->stripPointerCasts());
- auto *VectorPtr = new VPVectorPointerRecipe(
- Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
- I->getDebugLoc());
+ VPSingleDefRecipe *VectorPtr;
+ if (Reverse)
+ VectorPtr = new VPReverseVectorPointerRecipe(
+ Ptr, &Plan.getVF(), getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+ else
+ VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+ GEP ? GEP->isInBounds() : false,
+ I->getDebugLoc());
Builder.getInsertBlock()->appendRecipe(VectorPtr);
Ptr = VectorPtr;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 594492344d43ce..8df63b5796c739 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -882,6 +882,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPReplicateSC:
case VPRecipeBase::VPScalarIVStepsSC:
case VPRecipeBase::VPVectorPointerSC:
+ case VPRecipeBase::VPReverseVectorPointerSC:
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
@@ -1078,6 +1079,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+ R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1785,20 +1787,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
#endif
};
-/// A recipe to compute the pointers for widened memory accesses of IndexTy for
-/// all parts. If IsReverse is true, compute pointers for accessing the input in
-/// reverse order per part.
+/// A recipe to compute the pointers for widened memory accesses of IndexTy
+/// in reverse order.
+class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+ public VPUnrollPartAccessor<2> {
+ Type *IndexedTy;
+
+public:
+ VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
+ bool IsInBounds, DebugLoc DL)
+ : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+ ArrayRef<VPValue *>({Ptr, VF}),
+ GEPFlagsTy(IsInBounds), DL),
+ IndexedTy(IndexedTy) {}
+
+ VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
+
+ VPValue *getVFValue() { return getOperand(1); }
+ const VPValue *getVFValue() const { return getOperand(1); }
+
+ void execute(VPTransformState &State) override;
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+ /// Returns true if the recipe only uses the first part of operand \p Op.
+ bool onlyFirstPartUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ assert(getNumOperands() <= 2 && "must have at most two operands");
+ return true;
+ }
+
+ VPReverseVectorPointerRecipe *clone() override {
+ return new VPReverseVectorPointerRecipe(
+ getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// A recipe to compute the pointers for widened memory accesses of IndexTy.
class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
Type *IndexedTy;
- bool IsReverse;
public:
- VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
- bool IsInBounds, DebugLoc DL)
+ VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+ DebugLoc DL)
: VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
GEPFlagsTy(IsInBounds), DL),
- IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+ IndexedTy(IndexedTy) {}
VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
@@ -1819,8 +1865,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
}
VPVectorPointerRecipe *clone() override {
- return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
- isInBounds(), getDebugLoc());
+ return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
+ getDebugLoc());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 277df0637372d8..7bfbcc3dff0e1f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -261,9 +261,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
- VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
- return inferScalarType(R->getOperand(0));
- })
+ VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
+ [this](const VPRecipeBase *R) {
+ return inferScalarType(R->getOperand(0));
+ })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0d092b9c10acc8..1b715a28604e33 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -154,6 +154,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPDerivedIVSC:
case VPPredInstPHISC:
case VPScalarCastSC:
+ case VPVectorPointerSC:
+ case VPReverseVectorPointerSC:
return false;
case VPInstructionSC:
return mayWriteToMemory();
@@ -1813,38 +1815,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPVectorPointerRecipe ::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
- State.setDebugLocFrom(getDebugLoc());
- unsigned CurrentPart = getUnrollPart(*this);
+static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
+ unsigned CurrentPart, IRBuilderBase &Builder) {
// Use i32 for the gep index type when the value is constant,
// or query DataLayout for a more suitable index type otherwise.
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
- Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
- ? DL.getIndexType(Builder.getPtrTy(0))
- : Builder.getInt32Ty();
+ return IsScalable && (IsReverse || CurrentPart > 0)
+ ? DL.getIndexType(Builder.getPtrTy(0))
+ : Builder.getInt32Ty();
+}
+
+void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ unsigned CurrentPart = getUnrollPart(*this);
+ Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
+ CurrentPart, Builder);
+
+ // The wide store needs to start at the last vector element.
+ Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
+ if (IndexTy != RunTimeVF->getType())
+ RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
+ // NumElt = -CurrentPart * RunTimeVF
+ Value *NumElt = Builder.CreateMul(
+ ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
Value *Ptr = State.get(getOperand(0), VPLane(0));
bool InBounds = isInBounds();
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+ ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
- Value *ResultPtr = nullptr;
- if (IsReverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
- // NumElt = -CurrentPart * RunTimeVF
- Value *NumElt = Builder.CreateMul(
- ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane =
- Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
- ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
- } else {
- Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
- ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
- }
+ State.set(this, ResultPtr, /*IsScalar*/ true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent;
+ printAsOperand(O, SlotTracker);
+ O << " = reverse-vector-pointer ";
+ if (isInBounds())
+ O << "inbounds ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPVectorPointerRecipe::execute(VPTransformState &State) {
+ auto &Builder = State.Builder;
+ State.setDebugLocFrom(getDebugLoc());
+ unsigned CurrentPart = getUnrollPart(*this);
+ Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
+ CurrentPart, Builder);
+ Value *Ptr = State.get(getOperand(0), VPLane(0));
+ bool InBounds = isInBounds();
+
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
+ Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
State.set(this, ResultPtr, /*IsScalar*/ true);
}
@@ -1855,8 +1882,9 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent;
printAsOperand(O, SlotTracker);
O << " = vector-pointer ";
- if (IsReverse)
- O << "(reverse) ";
+
+ if (isInBounds())
+ O << "inbounds ";
printOperands(O, SlotTracker);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef71..1e32865e8ee576 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
- VPVectorPointerRecipe>(Copy) ||
+ VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
m_VPValue())))
Copy->addOperand(getConstantVPV(Part));
- if (isa<VPVectorPointerRecipe>(R))
+ if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
Copy->setOperand(0, R.getOperand(0));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 4c383244f96f1a..d7626e437db338 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -346,6 +346,7 @@ class VPDef {
VPScalarCastSC,
VPScalarIVStepsSC,
VPVectorPointerSC,
+ VPReverseVectorPointerSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index 81121019efe767..76562e80fbc4a1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -24,43 +24,36 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1
-; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[N]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3
-; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP18]], align 8
-; CHECK-NEXT: [[TMP19:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3
-; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]]
-; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]]
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]]
-; CHECK-NEXT: store <vscale x 8 x double> [[TMP19]], ptr [[TMP25]], align 8
-; CHECK-NEXT: store <vscale x 8 x double> [[TMP20]], ptr [[TMP31]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1
+; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP15]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]]
+; CHECK-NEXT: store <vscale x 8 x double> [[TMP16]], ptr [[TMP20]], align 8
+; CHECK-NEXT: store <vscale x 8 x double> [[TMP17]], ptr [[TMP24]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -75,8 +68,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
-; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00
+; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
@@ -126,43 +119,36 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: [[TMP7:%.*]] = cal...
[truncated]
|
@@ -4442,6 +4442,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, | |||
case VPDef::VPInstructionSC: | |||
case VPDef::VPCanonicalIVPHISC: | |||
case VPDef::VPVectorPointerSC: | |||
case VPDef::VPReverseVectorPointerSC: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be folded into #110974, as otherwise it may change behavior?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it is gone after rebase.
9777124
to
9a5ebc9
Compare
rebase after #110974 merges. |
This depends on #110974 to update VPVectorPointerRecipe.