-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Allow VPWidenPHI in non-native path. NFC #118662
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Allow VPWidenPHI in non-native path. NFC #118662
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Luke Lau (lukel97) ChangesWe can reuse VPWidenPHI in #118638, but it requires us to allow it in the non-native path. We also need to propagate the DebugLoc and use a different name in the generated PHI, so this splits these parts off in case we want it. We lose some debug info in dbg-outer-loop-vect.ll, but I think this is because the underlying phi node didn't have a DebugLoc to begin with. I think the current version is just carrying over the DebugLoc from the previous state. Full diff: https://github.com/llvm/llvm-project/pull/118662.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c7c044a042719..33d50de947aa61 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -511,7 +511,7 @@ class InnerLoopVectorizer {
VPTransformState &State);
/// Fix the non-induction PHIs in \p Plan.
- void fixNonInductionPHIs(VPTransformState &State);
+ void fixWidenedPHIs(VPTransformState &State);
/// Create a new phi node for the induction variable \p OrigPhi to resume
/// iteration count in the scalar epilogue, from where the vectorized loop
@@ -2931,9 +2931,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
}
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
- // Fix widened non-induction PHIs by setting up the PHI operands.
- if (EnableVPlanNativePath)
- fixNonInductionPHIs(State);
+ // Fix widened PHIs by setting up the PHI operands.
+ fixWidenedPHIs(State);
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
@@ -3070,7 +3069,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
} while (Changed);
}
-void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
+void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &P : VPBB->phis()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e1d828f038f9a2..e759cafb391b7c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2285,10 +2285,16 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
/// List of incoming blocks. Only used in the VPlan native path.
SmallVector<VPBasicBlock *, 2> IncomingBlocks;
+ /// Name to use for the generated IR instruction for the widened IV.
+ std::string Name;
+
public:
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
- VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
+ VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
+ const Twine &Name = "vec.phi")
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
+ Phi->getDebugLoc()),
+ Name(Name.str()) {
if (Start)
addOperand(Start);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ef5f6e22f82206..829ae7c80e0578 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3417,12 +3417,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenPHIRecipe::execute(VPTransformState &State) {
- assert(EnableVPlanNativePath &&
- "Non-native vplans are not expected to have VPWidenPHIRecipes.");
-
+ State.setDebugLocFrom(getDebugLoc());
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
- Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
+ Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
State.set(this, VecPhi);
}
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 66aceab9fb27c8..44afa34100c299 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
; CHECK: for.cond5.preheader1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]
|
@llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangesWe can reuse VPWidenPHI in #118638, but it requires us to allow it in the non-native path. We also need to propagate the DebugLoc and use a different name in the generated PHI, so this splits these parts off in case we want it. We lose some debug info in dbg-outer-loop-vect.ll, but I think this is because the underlying phi node didn't have a DebugLoc to begin with. I think the current version is just carrying over the DebugLoc from the previous state. Full diff: https://github.com/llvm/llvm-project/pull/118662.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3c7c044a042719..33d50de947aa61 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -511,7 +511,7 @@ class InnerLoopVectorizer {
VPTransformState &State);
/// Fix the non-induction PHIs in \p Plan.
- void fixNonInductionPHIs(VPTransformState &State);
+ void fixWidenedPHIs(VPTransformState &State);
/// Create a new phi node for the induction variable \p OrigPhi to resume
/// iteration count in the scalar epilogue, from where the vectorized loop
@@ -2931,9 +2931,8 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
}
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
- // Fix widened non-induction PHIs by setting up the PHI operands.
- if (EnableVPlanNativePath)
- fixNonInductionPHIs(State);
+ // Fix widened PHIs by setting up the PHI operands.
+ fixWidenedPHIs(State);
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
@@ -3070,7 +3069,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
} while (Changed);
}
-void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
+void InnerLoopVectorizer::fixWidenedPHIs(VPTransformState &State) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &P : VPBB->phis()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e1d828f038f9a2..e759cafb391b7c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2285,10 +2285,16 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe {
/// List of incoming blocks. Only used in the VPlan native path.
SmallVector<VPBasicBlock *, 2> IncomingBlocks;
+ /// Name to use for the generated IR instruction for the widened IV.
+ std::string Name;
+
public:
/// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
- VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
- : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
+ VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
+ const Twine &Name = "vec.phi")
+ : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi,
+ Phi->getDebugLoc()),
+ Name(Name.str()) {
if (Start)
addOperand(Start);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ef5f6e22f82206..829ae7c80e0578 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3417,12 +3417,10 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenPHIRecipe::execute(VPTransformState &State) {
- assert(EnableVPlanNativePath &&
- "Non-native vplans are not expected to have VPWidenPHIRecipes.");
-
+ State.setDebugLocFrom(getDebugLoc());
Value *Op0 = State.get(getOperand(0));
Type *VecTy = Op0->getType();
- Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
+ Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);
State.set(this, VecPhi);
}
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 66aceab9fb27c8..44afa34100c299 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 {
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ]
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]]
; CHECK: for.cond5.preheader1:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]]
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> splat (i1 true)), !dbg [[DBG22]]
|
@@ -511,7 +511,7 @@ class InnerLoopVectorizer { | |||
VPTransformState &State); | |||
|
|||
/// Fix the non-induction PHIs in \p Plan. | |||
void fixNonInductionPHIs(VPTransformState &State); | |||
void fixWidenedPHIs(VPTransformState &State); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I forgot to mention, I renamed this bit due to the plan to use it for induction phis in #118638, happy to take it out if preferred!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think renaming better to move to a separate NFC patch
@@ -2285,10 +2285,16 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { | |||
/// List of incoming blocks. Only used in the VPlan native path. | |||
SmallVector<VPBasicBlock *, 2> IncomingBlocks; | |||
|
|||
/// Name to use for the generated IR instruction for the widened IV. | |||
std::string Name; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe use SmallString or Twine?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
std::string seems to be what everything else in VPlan.h uses, and it doesn't look like Twine can be stored
@@ -15,8 +15,8 @@ define void @foo(ptr %h) !dbg !4 { | |||
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[FOR_COND_CLEANUP32:%.*]] ] | |||
; CHECK-NEXT: br label [[FOR_COND5_PREHEADER1:%.*]], !dbg [[DBG21]] | |||
; CHECK: for.cond5.preheader1: | |||
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_BODY]] ], [ [[TMP4:%.*]], [[FOR_COND5_PREHEADER1]] ], !dbg [[DBG21]] | |||
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why did we lose debug info here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The original input phi and gep never had any debug info to begin with. So I think it was accidentally copying over the debug info from the branch instruction. I.e. the VPTransformState's debug loc wasn't being cleared when moving between recipes
@@ -2285,10 +2285,16 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe { | |||
/// List of incoming blocks. Only used in the VPlan native path. | |||
SmallVector<VPBasicBlock *, 2> IncomingBlocks; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe update the comments above re: usage in vplan native? Given this is now used in both paths, do these need delete or clarified?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IncomingBlocks should still only be used in the vplan native path, so I've added more comments to the other methods that should only be called from it
We can reuse VPWidenPHI in llvm#118638, but it requires us to allow it in the non-native path. We also need to propagate the DebugLoc and use a different name in the generated PHI, so this splits these parts off in case we want it. We lose some debug info in dbg-outer-loop-vect.ll, but I think this is because the underlying phi node didn't have a DebugLoc to begin with. I think the current version is just carrying over the DebugLoc from the previous state.
…native path, also add back the assertion but check that the incomingblocks is empty
f0f50a5
to
46ae402
Compare
I've removed the DebugLoc changes from this to let #120338 handle it instead, so this should now be NFC |
We can reuse VPWidenPHI in #118638, but it requires us to allow it in the non-native path. We also need to use a different name in the generated PHI, so this allows it to be changed