Skip to content

Commit 36e8db7

Browse files
committed
[SLP][NFC]Extract main part of GetGEPCostDiff to a function, NFC.
1 parent e5638c5 commit 36e8db7

File tree

1 file changed

+80
-70
lines changed

1 file changed

+80
-70
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 80 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -6954,6 +6954,82 @@ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
69546954
return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
69556955
}
69566956

6957+
/// Calculate the scalar and the vector costs from vectorizing set of GEPs.
6958+
static std::pair<InstructionCost, InstructionCost>
6959+
getGEPCosts(const TargetTransformInfo &TTI, ArrayRef<Value *> Ptrs,
6960+
Value *BasePtr, unsigned Opcode, TTI::TargetCostKind CostKind,
6961+
Type *ScalarTy, VectorType *VecTy) {
6962+
InstructionCost ScalarCost = 0;
6963+
InstructionCost VecCost = 0;
6964+
// Here we differentiate two cases: (1) when Ptrs represent a regular
6965+
// vectorization tree node (as they are pointer arguments of scattered
6966+
// loads) or (2) when Ptrs are the arguments of loads or stores being
6967+
// vectorized as plane wide unit-stride load/store since all the
6968+
// loads/stores are known to be from/to adjacent locations.
6969+
if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
6970+
// Case 2: estimate costs for pointer related costs when vectorizing to
6971+
// a wide load/store.
6972+
// Scalar cost is estimated as a set of pointers with known relationship
6973+
// between them.
6974+
// For vector code we will use BasePtr as argument for the wide load/store
6975+
// but we also need to account all the instructions which are going to
6976+
// stay in vectorized code due to uses outside of these scalar
6977+
// loads/stores.
6978+
ScalarCost = TTI.getPointersChainCost(
6979+
Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
6980+
CostKind);
6981+
6982+
SmallVector<const Value *> PtrsRetainedInVecCode;
6983+
for (Value *V : Ptrs) {
6984+
if (V == BasePtr) {
6985+
PtrsRetainedInVecCode.push_back(V);
6986+
continue;
6987+
}
6988+
auto *Ptr = dyn_cast<GetElementPtrInst>(V);
6989+
// For simplicity assume Ptr to stay in vectorized code if it's not a
6990+
// GEP instruction. We don't care since it's cost considered free.
6991+
// TODO: We should check for any uses outside of vectorizable tree
6992+
// rather than just single use.
6993+
if (!Ptr || !Ptr->hasOneUse())
6994+
PtrsRetainedInVecCode.push_back(V);
6995+
}
6996+
6997+
if (PtrsRetainedInVecCode.size() == Ptrs.size()) {
6998+
// If all pointers stay in vectorized code then we don't have
6999+
// any savings on that.
7000+
return std::make_pair(TTI::TCC_Free, TTI::TCC_Free);
7001+
}
7002+
VecCost = TTI.getPointersChainCost(PtrsRetainedInVecCode, BasePtr,
7003+
TTI::PointersChainInfo::getKnownStride(),
7004+
VecTy, CostKind);
7005+
} else {
7006+
// Case 1: Ptrs are the arguments of loads that we are going to transform
7007+
// into masked gather load intrinsic.
7008+
// All the scalar GEPs will be removed as a result of vectorization.
7009+
// For any external uses of some lanes extract element instructions will
7010+
// be generated (which cost is estimated separately).
7011+
TTI::PointersChainInfo PtrsInfo =
7012+
all_of(Ptrs,
7013+
[](const Value *V) {
7014+
auto *Ptr = dyn_cast<GetElementPtrInst>(V);
7015+
return Ptr && !Ptr->hasAllConstantIndices();
7016+
})
7017+
? TTI::PointersChainInfo::getUnknownStride()
7018+
: TTI::PointersChainInfo::getKnownStride();
7019+
7020+
ScalarCost =
7021+
TTI.getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy, CostKind);
7022+
if (auto *BaseGEP = dyn_cast<GEPOperator>(BasePtr)) {
7023+
SmallVector<const Value *> Indices(BaseGEP->indices());
7024+
VecCost = TTI.getGEPCost(BaseGEP->getSourceElementType(),
7025+
BaseGEP->getPointerOperand(), Indices, VecTy,
7026+
CostKind);
7027+
}
7028+
}
7029+
7030+
return std::make_pair(ScalarCost, VecCost);
7031+
}
7032+
69577033
/// Merges shuffle masks and emits final shuffle instruction, if required. It
69587034
/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
69597035
/// when the actual shuffle instruction is generated only if this is actually
@@ -7917,78 +7993,12 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
79177993
// Calculate cost difference from vectorizing set of GEPs.
79187994
// Negative value means vectorizing is profitable.
79197995
auto GetGEPCostDiff = [=](ArrayRef<Value *> Ptrs, Value *BasePtr) {
7920-
InstructionCost ScalarCost = 0;
7921-
InstructionCost VecCost = 0;
7922-
// Here we differentiate two cases: (1) when Ptrs represent a regular
7923-
// vectorization tree node (as they are pointer arguments of scattered
7924-
// loads) or (2) when Ptrs are the arguments of loads or stores being
7925-
// vectorized as plane wide unit-stride load/store since all the
7926-
// loads/stores are known to be from/to adjacent locations.
79277996
assert(E->State == TreeEntry::Vectorize &&
79287997
"Entry state expected to be Vectorize here.");
7929-
if (isa<LoadInst, StoreInst>(VL0)) {
7930-
// Case 2: estimate costs for pointer related costs when vectorizing to
7931-
// a wide load/store.
7932-
// Scalar cost is estimated as a set of pointers with known relationship
7933-
// between them.
7934-
// For vector code we will use BasePtr as argument for the wide load/store
7935-
// but we also need to account all the instructions which are going to
7936-
// stay in vectorized code due to uses outside of these scalar
7937-
// loads/stores.
7938-
ScalarCost = TTI->getPointersChainCost(
7939-
Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
7940-
CostKind);
7941-
7942-
SmallVector<const Value *> PtrsRetainedInVecCode;
7943-
for (Value *V : Ptrs) {
7944-
if (V == BasePtr) {
7945-
PtrsRetainedInVecCode.push_back(V);
7946-
continue;
7947-
}
7948-
auto *Ptr = dyn_cast<GetElementPtrInst>(V);
7949-
// For simplicity assume Ptr to stay in vectorized code if it's not a
7950-
// GEP instruction. We don't care since it's cost considered free.
7951-
// TODO: We should check for any uses outside of vectorizable tree
7952-
// rather than just single use.
7953-
if (!Ptr || !Ptr->hasOneUse())
7954-
PtrsRetainedInVecCode.push_back(V);
7955-
}
7956-
7957-
if (PtrsRetainedInVecCode.size() == Ptrs.size()) {
7958-
// If all pointers stay in vectorized code then we don't have
7959-
// any savings on that.
7960-
LLVM_DEBUG(dumpTreeCosts(E, 0, ScalarCost, ScalarCost,
7961-
"Calculated GEPs cost for Tree"));
7962-
return InstructionCost{TTI::TCC_Free};
7963-
}
7964-
VecCost = TTI->getPointersChainCost(
7965-
PtrsRetainedInVecCode, BasePtr,
7966-
TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind);
7967-
} else {
7968-
// Case 1: Ptrs are the arguments of loads that we are going to transform
7969-
// into masked gather load intrinsic.
7970-
// All the scalar GEPs will be removed as a result of vectorization.
7971-
// For any external uses of some lanes extract element instructions will
7972-
// be generated (which cost is estimated separately).
7973-
TTI::PointersChainInfo PtrsInfo =
7974-
all_of(Ptrs,
7975-
[](const Value *V) {
7976-
auto *Ptr = dyn_cast<GetElementPtrInst>(V);
7977-
return Ptr && !Ptr->hasAllConstantIndices();
7978-
})
7979-
? TTI::PointersChainInfo::getUnknownStride()
7980-
: TTI::PointersChainInfo::getKnownStride();
7981-
7982-
ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy,
7983-
CostKind);
7984-
if (auto *BaseGEP = dyn_cast<GEPOperator>(BasePtr)) {
7985-
SmallVector<const Value *> Indices(BaseGEP->indices());
7986-
VecCost = TTI->getGEPCost(BaseGEP->getSourceElementType(),
7987-
BaseGEP->getPointerOperand(), Indices, VecTy,
7988-
CostKind);
7989-
}
7990-
}
7991-
7998+
InstructionCost ScalarCost = 0;
7999+
InstructionCost VecCost = 0;
8000+
std::tie(ScalarCost, VecCost) = getGEPCosts(
8001+
*TTI, Ptrs, BasePtr, E->getOpcode(), CostKind, ScalarTy, VecTy);
79928002
LLVM_DEBUG(dumpTreeCosts(E, 0, VecCost, ScalarCost,
79938003
"Calculated GEPs cost for Tree"));
79948004

0 commit comments

Comments
 (0)