Skip to content

Commit a4aa6bc

Browse files
committed
[SLP]Fix PR106667: carefully look for operand nodes.
If the operand node has the same scalars as one of the vectorized nodes, the compiler could miss this and incorrectly request minbitwidth data for the wrong node. It may lead to a compiler crash, because the vectorized node might have different minbw result. Fixes #106667
1 parent 5500e21 commit a4aa6bc

File tree

2 files changed

+208
-169
lines changed

2 files changed

+208
-169
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 164 additions & 169 deletions
Original file line numberDiff line numberDiff line change
@@ -2864,6 +2864,12 @@ class BoUpSLP {
28642864
/// avoid issues with def-use order.
28652865
Value *vectorizeTree(TreeEntry *E, bool PostponedPHIs);
28662866

2867+
TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx);
2868+
const TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E,
2869+
unsigned NodeIdx) const {
2870+
return const_cast<BoUpSLP *>(this)->getMatchedVectorizedOperand(E, NodeIdx);
2871+
}
2872+
28672873
/// Vectorize a single entry in the tree, the \p Idx-th operand of the entry
28682874
/// \p E.
28692875
/// \param PostponedPHIs true, if need to postpone emission of phi nodes to
@@ -6964,6 +6970,55 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
69646970
}
69656971
}
69666972

6973+
// Check if this is a duplicate of another entry.
6974+
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
6975+
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
6976+
if (!E->isSame(VL)) {
6977+
auto It = MultiNodeScalars.find(S.OpValue);
6978+
if (It != MultiNodeScalars.end()) {
6979+
auto *TEIt = find_if(It->getSecond(),
6980+
[&](TreeEntry *ME) { return ME->isSame(VL); });
6981+
if (TEIt != It->getSecond().end())
6982+
E = *TEIt;
6983+
else
6984+
E = nullptr;
6985+
} else {
6986+
E = nullptr;
6987+
}
6988+
}
6989+
if (!E) {
6990+
if (!doesNotNeedToBeScheduled(S.OpValue)) {
6991+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
6992+
if (TryToFindDuplicates(S))
6993+
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
6994+
ReuseShuffleIndices);
6995+
return;
6996+
}
6997+
SmallPtrSet<const TreeEntry *, 4> Nodes;
6998+
Nodes.insert(getTreeEntry(S.OpValue));
6999+
for (const TreeEntry *E : MultiNodeScalars.lookup(S.OpValue))
7000+
Nodes.insert(E);
7001+
SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
7002+
if (any_of(Nodes, [&](const TreeEntry *E) {
7003+
return all_of(E->Scalars,
7004+
[&](Value *V) { return Values.contains(V); });
7005+
})) {
7006+
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
7007+
if (TryToFindDuplicates(S))
7008+
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
7009+
ReuseShuffleIndices);
7010+
return;
7011+
}
7012+
} else {
7013+
// Record the reuse of the tree node. FIXME, currently this is only used
7014+
// to properly draw the graph rather than for the actual vectorization.
7015+
E->UserTreeIndices.push_back(UserTreeIdx);
7016+
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
7017+
<< ".\n");
7018+
return;
7019+
}
7020+
}
7021+
69677022
// Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
69687023
// a load), in which case peek through to include it in the tree, without
69697024
// ballooning over-budget.
@@ -7095,55 +7150,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
70957150
// We now know that this is a vector of instructions of the same type from
70967151
// the same block.
70977152

7098-
// Check if this is a duplicate of another entry.
7099-
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
7100-
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
7101-
if (!E->isSame(VL)) {
7102-
auto It = MultiNodeScalars.find(S.OpValue);
7103-
if (It != MultiNodeScalars.end()) {
7104-
auto *TEIt = find_if(It->getSecond(),
7105-
[&](TreeEntry *ME) { return ME->isSame(VL); });
7106-
if (TEIt != It->getSecond().end())
7107-
E = *TEIt;
7108-
else
7109-
E = nullptr;
7110-
} else {
7111-
E = nullptr;
7112-
}
7113-
}
7114-
if (!E) {
7115-
if (!doesNotNeedToBeScheduled(S.OpValue)) {
7116-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
7117-
if (TryToFindDuplicates(S))
7118-
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
7119-
ReuseShuffleIndices);
7120-
return;
7121-
}
7122-
SmallPtrSet<const TreeEntry *, 4> Nodes;
7123-
Nodes.insert(getTreeEntry(S.OpValue));
7124-
for (const TreeEntry *E : MultiNodeScalars.lookup(S.OpValue))
7125-
Nodes.insert(E);
7126-
SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
7127-
if (any_of(Nodes, [&](const TreeEntry *E) {
7128-
return all_of(E->Scalars,
7129-
[&](Value *V) { return Values.contains(V); });
7130-
})) {
7131-
LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
7132-
if (TryToFindDuplicates(S))
7133-
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
7134-
ReuseShuffleIndices);
7135-
return;
7136-
}
7137-
} else {
7138-
// Record the reuse of the tree node. FIXME, currently this is only used
7139-
// to properly draw the graph rather than for the actual vectorization.
7140-
E->UserTreeIndices.push_back(UserTreeIdx);
7141-
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
7142-
<< ".\n");
7143-
return;
7144-
}
7145-
}
7146-
71477153
// Check that none of the instructions in the bundle are already in the tree.
71487154
for (Value *V : VL) {
71497155
if ((!IsScatterVectorizeUserTE && !isa<Instruction>(V)) ||
@@ -9362,22 +9368,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
93629368

93639369
const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E,
93649370
unsigned Idx) const {
9365-
Value *Op = E->getOperand(Idx).front();
9366-
if (const TreeEntry *TE = getTreeEntry(Op)) {
9367-
if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
9368-
return EI.EdgeIdx == Idx && EI.UserTE == E;
9369-
}) != TE->UserTreeIndices.end())
9370-
return TE;
9371-
auto MIt = MultiNodeScalars.find(Op);
9372-
if (MIt != MultiNodeScalars.end()) {
9373-
for (const TreeEntry *TE : MIt->second) {
9374-
if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) {
9375-
return EI.EdgeIdx == Idx && EI.UserTE == E;
9376-
}) != TE->UserTreeIndices.end())
9377-
return TE;
9378-
}
9379-
}
9380-
}
9371+
if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx))
9372+
return VE;
93819373
const auto *It =
93829374
find_if(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
93839375
return TE->isGather() &&
@@ -12521,120 +12513,123 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1252112513
}
1252212514
};
1252312515

12524-
Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
12525-
bool PostponedPHIs) {
12526-
ValueList &VL = E->getOperand(NodeIdx);
12527-
const unsigned VF = VL.size();
12516+
BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E,
12517+
unsigned NodeIdx) {
12518+
ArrayRef<Value *> VL = E->getOperand(NodeIdx);
1252812519
InstructionsState S = getSameOpcode(VL, *TLI);
1252912520
// Special processing for GEPs bundle, which may include non-gep values.
1253012521
if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
1253112522
const auto *It = find_if(VL, IsaPred<GetElementPtrInst>);
1253212523
if (It != VL.end())
1253312524
S = getSameOpcode(*It, *TLI);
1253412525
}
12535-
if (S.getOpcode()) {
12536-
auto CheckSameVE = [&](const TreeEntry *VE) {
12537-
return VE->isSame(VL) &&
12538-
(any_of(VE->UserTreeIndices,
12539-
[E, NodeIdx](const EdgeInfo &EI) {
12540-
return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
12541-
}) ||
12542-
any_of(VectorizableTree,
12543-
[E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
12544-
return TE->isOperandGatherNode({E, NodeIdx}) &&
12545-
VE->isSame(TE->Scalars);
12546-
}));
12526+
if (!S.getOpcode())
12527+
return nullptr;
12528+
auto CheckSameVE = [&](const TreeEntry *VE) {
12529+
return VE->isSame(VL) &&
12530+
(any_of(VE->UserTreeIndices,
12531+
[E, NodeIdx](const EdgeInfo &EI) {
12532+
return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
12533+
}) ||
12534+
any_of(VectorizableTree,
12535+
[E, NodeIdx, VE](const std::unique_ptr<TreeEntry> &TE) {
12536+
return TE->isOperandGatherNode(
12537+
{const_cast<TreeEntry *>(E), NodeIdx}) &&
12538+
VE->isSame(TE->Scalars);
12539+
}));
12540+
};
12541+
TreeEntry *VE = getTreeEntry(S.OpValue);
12542+
if (VE && CheckSameVE(VE))
12543+
return VE;
12544+
auto It = MultiNodeScalars.find(S.OpValue);
12545+
if (It != MultiNodeScalars.end()) {
12546+
auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) {
12547+
return TE != VE && CheckSameVE(TE);
12548+
});
12549+
if (I != It->getSecond().end())
12550+
return *I;
12551+
}
12552+
return nullptr;
12553+
}
12554+
12555+
Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
12556+
bool PostponedPHIs) {
12557+
ValueList &VL = E->getOperand(NodeIdx);
12558+
const unsigned VF = VL.size();
12559+
if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx)) {
12560+
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
12561+
// V may be affected by MinBWs.
12562+
// We want ShuffleInstructionBuilder to correctly support REVEC. The key
12563+
// factor is the number of elements, not their type.
12564+
Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
12565+
unsigned NumElements = getNumElements(VL.front()->getType());
12566+
ShuffleInstructionBuilder ShuffleBuilder(
12567+
NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
12568+
: ScalarTy,
12569+
Builder, *this);
12570+
ShuffleBuilder.add(V, Mask);
12571+
SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
12572+
E->CombinedEntriesWithIndices.size());
12573+
transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
12574+
[&](const auto &P) {
12575+
return std::make_pair(VectorizableTree[P.first].get(),
12576+
P.second);
12577+
});
12578+
return ShuffleBuilder.finalize(std::nullopt, SubVectors);
1254712579
};
12548-
TreeEntry *VE = getTreeEntry(S.OpValue);
12549-
bool IsSameVE = VE && CheckSameVE(VE);
12550-
if (!IsSameVE) {
12551-
auto It = MultiNodeScalars.find(S.OpValue);
12552-
if (It != MultiNodeScalars.end()) {
12553-
auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) {
12554-
return TE != VE && CheckSameVE(TE);
12555-
});
12556-
if (I != It->getSecond().end()) {
12557-
VE = *I;
12558-
IsSameVE = true;
12559-
}
12560-
}
12561-
}
12562-
if (IsSameVE) {
12563-
auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
12564-
// V may be affected by MinBWs.
12565-
// We want ShuffleInstructionBuilder to correctly support REVEC. The key
12566-
// factor is the number of elements, not their type.
12567-
Type *ScalarTy = cast<VectorType>(V->getType())->getElementType();
12568-
unsigned NumElements = getNumElements(VL.front()->getType());
12569-
ShuffleInstructionBuilder ShuffleBuilder(
12570-
NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements)
12571-
: ScalarTy,
12572-
Builder, *this);
12573-
ShuffleBuilder.add(V, Mask);
12574-
SmallVector<std::pair<const TreeEntry *, unsigned>> SubVectors(
12575-
E->CombinedEntriesWithIndices.size());
12576-
transform(E->CombinedEntriesWithIndices, SubVectors.begin(),
12577-
[&](const auto &P) {
12578-
return std::make_pair(VectorizableTree[P.first].get(),
12579-
P.second);
12580-
});
12581-
return ShuffleBuilder.finalize(std::nullopt, SubVectors);
12582-
};
12583-
Value *V = vectorizeTree(VE, PostponedPHIs);
12584-
if (VF * getNumElements(VL[0]->getType()) !=
12585-
cast<FixedVectorType>(V->getType())->getNumElements()) {
12586-
if (!VE->ReuseShuffleIndices.empty()) {
12587-
// Reshuffle to get only unique values.
12588-
// If some of the scalars are duplicated in the vectorization
12589-
// tree entry, we do not vectorize them but instead generate a
12590-
// mask for the reuses. But if there are several users of the
12591-
// same entry, they may have different vectorization factors.
12592-
// This is especially important for PHI nodes. In this case, we
12593-
// need to adapt the resulting instruction for the user
12594-
// vectorization factor and have to reshuffle it again to take
12595-
// only unique elements of the vector. Without this code the
12596-
// function incorrectly returns reduced vector instruction with
12597-
// the same elements, not with the unique ones.
12598-
12599-
// block:
12600-
// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
12601-
// %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
12602-
// ... (use %2)
12603-
// %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
12604-
// br %block
12605-
SmallVector<int> Mask(VF, PoisonMaskElem);
12606-
for (auto [I, V] : enumerate(VL)) {
12607-
if (isa<PoisonValue>(V))
12608-
continue;
12609-
Mask[I] = VE->findLaneForValue(V);
12610-
}
12611-
V = FinalShuffle(V, Mask);
12612-
} else {
12613-
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
12614-
"Expected vectorization factor less "
12615-
"than original vector size.");
12616-
SmallVector<int> UniformMask(VF, 0);
12617-
std::iota(UniformMask.begin(), UniformMask.end(), 0);
12618-
V = FinalShuffle(V, UniformMask);
12580+
Value *V = vectorizeTree(VE, PostponedPHIs);
12581+
if (VF * getNumElements(VL[0]->getType()) !=
12582+
cast<FixedVectorType>(V->getType())->getNumElements()) {
12583+
if (!VE->ReuseShuffleIndices.empty()) {
12584+
// Reshuffle to get only unique values.
12585+
// If some of the scalars are duplicated in the vectorization
12586+
// tree entry, we do not vectorize them but instead generate a
12587+
// mask for the reuses. But if there are several users of the
12588+
// same entry, they may have different vectorization factors.
12589+
// This is especially important for PHI nodes. In this case, we
12590+
// need to adapt the resulting instruction for the user
12591+
// vectorization factor and have to reshuffle it again to take
12592+
// only unique elements of the vector. Without this code the
12593+
// function incorrectly returns reduced vector instruction with
12594+
// the same elements, not with the unique ones.
12595+
12596+
// block:
12597+
// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
12598+
// %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
12599+
// ... (use %2)
12600+
// %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
12601+
// br %block
12602+
SmallVector<int> Mask(VF, PoisonMaskElem);
12603+
for (auto [I, V] : enumerate(VL)) {
12604+
if (isa<PoisonValue>(V))
12605+
continue;
12606+
Mask[I] = VE->findLaneForValue(V);
1261912607
}
12620-
}
12621-
// Need to update the operand gather node, if actually the operand is not a
12622-
// vectorized node, but the buildvector/gather node, which matches one of
12623-
// the vectorized nodes.
12624-
if (find_if(VE->UserTreeIndices, [&](const EdgeInfo &EI) {
12625-
return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
12626-
}) == VE->UserTreeIndices.end()) {
12627-
auto *It = find_if(
12628-
VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
12629-
return TE->isGather() &&
12630-
TE->UserTreeIndices.front().UserTE == E &&
12631-
TE->UserTreeIndices.front().EdgeIdx == NodeIdx;
12632-
});
12633-
assert(It != VectorizableTree.end() && "Expected gather node operand.");
12634-
(*It)->VectorizedValue = V;
12635-
}
12636-
return V;
12608+
V = FinalShuffle(V, Mask);
12609+
} else {
12610+
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
12611+
"Expected vectorization factor less "
12612+
"than original vector size.");
12613+
SmallVector<int> UniformMask(VF, 0);
12614+
std::iota(UniformMask.begin(), UniformMask.end(), 0);
12615+
V = FinalShuffle(V, UniformMask);
12616+
}
12617+
}
12618+
// Need to update the operand gather node, if actually the operand is not a
12619+
// vectorized node, but the buildvector/gather node, which matches one of
12620+
// the vectorized nodes.
12621+
if (find_if(VE->UserTreeIndices, [&](const EdgeInfo &EI) {
12622+
return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
12623+
}) == VE->UserTreeIndices.end()) {
12624+
auto *It =
12625+
find_if(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
12626+
return TE->isGather() && TE->UserTreeIndices.front().UserTE == E &&
12627+
TE->UserTreeIndices.front().EdgeIdx == NodeIdx;
12628+
});
12629+
assert(It != VectorizableTree.end() && "Expected gather node operand.");
12630+
(*It)->VectorizedValue = V;
1263712631
}
12632+
return V;
1263812633
}
1263912634

1264012635
// Find the corresponding gather entry and vectorize it.

0 commit comments

Comments
 (0)