@@ -9036,41 +9036,45 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
9036
9036
"Expected only single user of the gather node.");
9037
9037
// TODO: currently checking only for Scalars in the tree entry, need to count
9038
9038
// reused elements too for better cost estimation.
9039
- Instruction &UserInst =
9040
- getLastInstructionInBundle(TE->UserTreeIndices.front() .UserTE);
9041
- BasicBlock *ParentBB = nullptr;
9039
+ const EdgeInfo &TEUseEI = TE->UserTreeIndices.front();
9040
+ const Instruction *TEInsertPt = & getLastInstructionInBundle(TEUseEI .UserTE);
9041
+ const BasicBlock *TEInsertBlock = nullptr;
9042
9042
// Main node of PHI entries keeps the correct order of operands/incoming
9043
9043
// blocks.
9044
- if (auto *PHI =
9045
- dyn_cast<PHINode>(TE->UserTreeIndices.front().UserTE->getMainOp())) {
9046
- ParentBB = PHI->getIncomingBlock(TE->UserTreeIndices.front().EdgeIdx);
9044
+ if (auto *PHI = dyn_cast<PHINode>(TEUseEI.UserTE->getMainOp())) {
9045
+ TEInsertBlock = PHI->getIncomingBlock(TEUseEI.EdgeIdx);
9047
9046
} else {
9048
- ParentBB = UserInst. getParent();
9047
+ TEInsertBlock = TEInsertPt-> getParent();
9049
9048
}
9050
- auto *NodeUI = DT->getNode(ParentBB );
9049
+ auto *NodeUI = DT->getNode(TEInsertBlock );
9051
9050
assert(NodeUI && "Should only process reachable instructions");
9052
9051
SmallPtrSet<Value *, 4> GatheredScalars(VL.begin(), VL.end());
9053
- auto CheckOrdering = [&](Instruction *LastEI) {
9054
- // Check if the user node of the TE comes after user node of EntryPtr,
9055
- // otherwise EntryPtr depends on TE.
9056
- // Gather nodes usually are not scheduled and inserted before their first
9057
- // user node. So, instead of checking dependency between the gather nodes
9058
- // themselves, we check the dependency between their user nodes.
9059
- // If one user node comes before the second one, we cannot use the second
9060
- // gather node as the source vector for the first gather node, because in
9061
- // the list of instructions it will be emitted later.
9062
- auto *EntryParent = LastEI->getParent();
9063
- auto *NodeEUI = DT->getNode(EntryParent);
9052
+ auto CheckOrdering = [&](const Instruction *InsertPt) {
9053
+ // Argument InsertPt is an instruction where vector code for some other
9054
+ // tree entry (one that shares one or more scalars with TE) is going to be
9055
+ // generated. This lambda returns true if insertion point of vector code
9056
+ // for the TE dominates that point (otherwise dependency is the other way
9057
+ // around). The other node is not limited to be of a gather kind. Gather
9058
+ // nodes are not scheduled and their vector code is inserted before their
9059
+ // first user. If user is PHI, that is supposed to be at the end of a
9060
+ // predecessor block. Otherwise it is the last instruction among scalars of
9061
+ // the user node. So, instead of checking dependency between instructions
9062
+ // themselves, we check dependency between their insertion points for vector
9063
+ // code (since each scalar instruction ends up as a lane of a vector
9064
+ // instruction).
9065
+ const BasicBlock *InsertBlock = InsertPt->getParent();
9066
+ auto *NodeEUI = DT->getNode(InsertBlock);
9064
9067
if (!NodeEUI)
9065
9068
return false;
9066
9069
assert((NodeUI == NodeEUI) ==
9067
9070
(NodeUI->getDFSNumIn() == NodeEUI->getDFSNumIn()) &&
9068
9071
"Different nodes should have different DFS numbers");
9069
9072
// Check the order of the gather nodes users.
9070
- if (UserInst. getParent() != EntryParent &&
9073
+ if (TEInsertPt-> getParent() != InsertBlock &&
9071
9074
(DT->dominates(NodeUI, NodeEUI) || !DT->dominates(NodeEUI, NodeUI)))
9072
9075
return false;
9073
- if (UserInst.getParent() == EntryParent && UserInst.comesBefore(LastEI))
9076
+ if (TEInsertPt->getParent() == InsertBlock &&
9077
+ TEInsertPt->comesBefore(InsertPt))
9074
9078
return false;
9075
9079
return true;
9076
9080
};
@@ -9095,49 +9099,35 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
9095
9099
[&](Value *V) { return GatheredScalars.contains(V); }) &&
9096
9100
"Must contain at least single gathered value.");
9097
9101
assert(TEPtr->UserTreeIndices.size() == 1 &&
9098
- "Expected only single user of the gather node.");
9099
- PHINode *EntryPHI =
9100
- dyn_cast<PHINode>(TEPtr->UserTreeIndices.front().UserTE->getMainOp());
9101
- Instruction *EntryUserInst =
9102
- EntryPHI ? nullptr
9103
- : &getLastInstructionInBundle(
9104
- TEPtr->UserTreeIndices.front().UserTE);
9105
- if (&UserInst == EntryUserInst) {
9106
- assert(!EntryPHI && "Unexpected phi node entry.");
9107
- // If 2 gathers are operands of the same entry, compare operands
9108
- // indices, use the earlier one as the base.
9109
- if (TE->UserTreeIndices.front().UserTE ==
9110
- TEPtr->UserTreeIndices.front().UserTE &&
9111
- TE->UserTreeIndices.front().EdgeIdx <
9112
- TEPtr->UserTreeIndices.front().EdgeIdx)
9102
+ "Expected only single user of a gather node.");
9103
+ const EdgeInfo &UseEI = TEPtr->UserTreeIndices.front();
9104
+
9105
+ PHINode *UserPHI = dyn_cast<PHINode>(UseEI.UserTE->getMainOp());
9106
+ const Instruction *InsertPt =
9107
+ UserPHI ? UserPHI->getIncomingBlock(UseEI.EdgeIdx)->getTerminator()
9108
+ : &getLastInstructionInBundle(UseEI.UserTE);
9109
+ if (!UserPHI && TEInsertPt == InsertPt) {
9110
+ // If 2 gathers are operands of the same non-PHI entry,
9111
+ // compare operands indices, use the earlier one as the base.
9112
+ if (TEUseEI.UserTE == UseEI.UserTE && TEUseEI.EdgeIdx < UseEI.EdgeIdx)
9113
9113
continue;
9114
9114
// If the user instruction is used for some reason in different
9115
9115
// vectorized nodes - make it depend on index.
9116
- if (TE->UserTreeIndices.front().UserTE !=
9117
- TEPtr->UserTreeIndices.front().UserTE &&
9118
- TE->Idx < TEPtr->Idx)
9116
+ if (TEUseEI.UserTE != UseEI.UserTE && TE->Idx < TEPtr->Idx)
9119
9117
continue;
9120
9118
}
9121
- // Check if the user node of the TE comes after user node of EntryPtr,
9122
- // otherwise EntryPtr depends on TE.
9123
- auto *EntryI =
9124
- EntryPHI
9125
- ? EntryPHI
9126
- ->getIncomingBlock(TEPtr->UserTreeIndices.front().EdgeIdx)
9127
- ->getTerminator()
9128
- : EntryUserInst;
9129
- if ((ParentBB != EntryI->getParent() ||
9130
- TE->UserTreeIndices.front().EdgeIdx <
9131
- TEPtr->UserTreeIndices.front().EdgeIdx ||
9132
- TE->UserTreeIndices.front().UserTE !=
9133
- TEPtr->UserTreeIndices.front().UserTE) &&
9134
- !CheckOrdering(EntryI))
9119
+
9120
+ // Check if the user node of the TE comes after user node of TEPtr,
9121
+ // otherwise TEPtr depends on TE.
9122
+ if ((TEInsertBlock != InsertPt->getParent() ||
9123
+ TEUseEI.EdgeIdx < UseEI.EdgeIdx || TEUseEI.UserTE != UseEI.UserTE) &&
9124
+ !CheckOrdering(InsertPt))
9135
9125
continue;
9136
9126
VToTEs.insert(TEPtr);
9137
9127
}
9138
9128
if (const TreeEntry *VTE = getTreeEntry(V)) {
9139
- Instruction &EntryUserInst = getLastInstructionInBundle(VTE);
9140
- if (&EntryUserInst == &UserInst || !CheckOrdering(&EntryUserInst ))
9129
+ Instruction &LastBundleInst = getLastInstructionInBundle(VTE);
9130
+ if (&LastBundleInst == TEInsertPt || !CheckOrdering(&LastBundleInst ))
9141
9131
continue;
9142
9132
VToTEs.insert(VTE);
9143
9133
}
0 commit comments