Skip to content

Commit 8277288

Browse files
committed
[CostModel] Add a DstTy to getShuffleCost
A shuffle will take two input vectors and a mask, to produce a new vector of size <MaskElts x SrcEltTy>. Historically it has been assumed that the SrcTy and the DstTy are the same for getShuffleCost, with that being relaxed in recent years. If the Tp passed to getShuffleCost is the SrcTy, then the DstTy can be calculated from the Mask elts and the src elt size, but the Mask is not always provided and the Tp is not reliably always the SrcTy. This has led to situations notably in the SLP vectorizer but also in the generic cost routines where assumption about how vectors will be legalized are built into the generic cost routines - for example whether they will widen or promote, with the cost modelling assuming they will widen but the default lowering to promote for integer vectors. This patch attempts to start improving that - it originally tried to alter more of the cost model but that too quickly became too many changes at once, so this patch just plumbs in a DstTy to getShuffleCost so that DstTy and SrcTy can be reliably distinguished. The callers of getShuffleCost have been updated to try and include a DstTy that is more accurate. Otherwise it tries to be fairly non-functional, keeping the SrcTy used as the primary type used in shuffle cost routines, only using DstTy where it was in the past (for InsertSubVector for example). Some asserts have been added that help to check for consistent values when a Mask and a DstTy are provided to getShuffleCost. Some of them took a while to get right, and some non-mask calls might still be incorrect. Hopefully this will provide a useful base to build more shuffles that alter size.
1 parent 85f791d commit 8277288

25 files changed

+444
-348
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,19 +1381,21 @@ class TargetTransformInfo {
13811381
const SmallBitVector &OpcodeMask,
13821382
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
13831383

1384-
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1385-
/// The exact mask may be passed as Mask, or else the array will be empty.
1386-
/// The index and subtype parameters are used by the subvector insertion and
1387-
/// extraction shuffle kinds to show the insert/extract point and the type of
1388-
/// the subvector being inserted/extracted. The operands of the shuffle can be
1389-
/// passed through \p Args, which helps improve the cost estimation in some
1390-
/// cases, like in broadcast loads.
1391-
/// NOTE: For subvector extractions Tp represents the source type.
1392-
LLVM_ABI InstructionCost getShuffleCost(
1393-
ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = {},
1394-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
1395-
VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = {},
1396-
const Instruction *CxtI = nullptr) const;
1384+
/// \return The cost of a shuffle instruction of kind Kind with inputs of type
1385+
/// SrcTy, producing a vector of type DstTy. The exact mask may be passed as
1386+
/// Mask, or else the array will be empty. The index and subtype parameters
1387+
/// are used by the subvector insertion and extraction shuffle kinds to show
1388+
/// the insert/extract point and the type of the subvector being
1389+
/// inserted/extracted. The operands of the shuffle can be passed through \p
1390+
/// Args, which helps improve the cost estimation in some cases, like in
1391+
/// broadcast loads.
1392+
LLVM_ABI InstructionCost
1393+
getShuffleCost(ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
1394+
ArrayRef<int> Mask = {},
1395+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1396+
int Index = 0, VectorType *SubTp = nullptr,
1397+
ArrayRef<const Value *> Args = {},
1398+
const Instruction *CxtI = nullptr) const;
13971399

13981400
/// Represents a hint about the context in which a cast is used.
13991401
///

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,9 @@ class TargetTransformInfoImplBase {
711711
}
712712

713713
virtual InstructionCost
714-
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
715-
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
716-
ArrayRef<const Value *> Args = {},
714+
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
715+
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
716+
VectorType *SubTp, ArrayRef<const Value *> Args = {},
717717
const Instruction *CxtI = nullptr) const {
718718
return 1;
719719
}
@@ -1545,13 +1545,14 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
15451545
return 0;
15461546

15471547
if (Shuffle->isExtractSubvectorMask(SubIndex))
1548-
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1549-
Mask, CostKind, SubIndex, VecTy,
1550-
Operands, Shuffle);
1548+
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1549+
VecSrcTy, Mask, CostKind, SubIndex,
1550+
VecTy, Operands, Shuffle);
15511551

15521552
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
15531553
return TargetTTI->getShuffleCost(
1554-
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1554+
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1555+
SubIndex,
15551556
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
15561557
Operands, Shuffle);
15571558

@@ -1580,62 +1581,69 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
15801581

15811582
return TargetTTI->getShuffleCost(
15821583
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
1583-
AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1584+
VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
15841585
}
15851586

15861587
// Narrowing shuffle - perform shuffle at original wider width and
15871588
// then extract the lower elements.
1589+
// FIXME: This can assume widening, which is not true of all vector
1590+
// architectures (and is not even the default).
15881591
AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
15891592

15901593
InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
15911594
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
1592-
VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1595+
VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1596+
Shuffle);
15931597

15941598
SmallVector<int, 16> ExtractMask(Mask.size());
15951599
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
15961600
return ShuffleCost + TargetTTI->getShuffleCost(
1597-
TTI::SK_ExtractSubvector, VecSrcTy,
1601+
TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
15981602
ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
15991603
}
16001604

16011605
if (Shuffle->isIdentity())
16021606
return 0;
16031607

16041608
if (Shuffle->isReverse())
1605-
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1606-
0, nullptr, Operands, Shuffle);
1609+
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1610+
CostKind, 0, nullptr, Operands,
1611+
Shuffle);
16071612

16081613
if (Shuffle->isSelect())
1609-
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1610-
0, nullptr, Operands, Shuffle);
1614+
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1615+
CostKind, 0, nullptr, Operands,
1616+
Shuffle);
16111617

16121618
if (Shuffle->isTranspose())
1613-
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1614-
CostKind, 0, nullptr, Operands,
1619+
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1620+
Mask, CostKind, 0, nullptr, Operands,
16151621
Shuffle);
16161622

16171623
if (Shuffle->isZeroEltSplat())
1618-
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1619-
CostKind, 0, nullptr, Operands,
1624+
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1625+
Mask, CostKind, 0, nullptr, Operands,
16201626
Shuffle);
16211627

16221628
if (Shuffle->isSingleSource())
1623-
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1624-
CostKind, 0, nullptr, Operands,
1625-
Shuffle);
1629+
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1630+
VecSrcTy, Mask, CostKind, 0, nullptr,
1631+
Operands, Shuffle);
16261632

16271633
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
16281634
return TargetTTI->getShuffleCost(
1629-
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1635+
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
16301636
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
16311637
Shuffle);
16321638

16331639
if (Shuffle->isSplice(SubIndex))
1634-
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1635-
SubIndex, nullptr, Operands, Shuffle);
1640+
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1641+
CostKind, SubIndex, nullptr, Operands,
1642+
Shuffle);
16361643

1637-
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1638-
CostKind, 0, nullptr, Operands, Shuffle);
1644+
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1645+
Mask, CostKind, 0, nullptr, Operands,
1646+
Shuffle);
16391647
}
16401648
case Instruction::ExtractElement: {
16411649
auto *EEI = dyn_cast<ExtractElementInst>(U);

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
329329
// Cost the call + mask.
330330
auto Cost =
331331
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
332-
if (VD->isMasked())
333-
Cost += thisT()->getShuffleCost(
334-
TargetTransformInfo::SK_Broadcast,
335-
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
336-
nullptr, {});
332+
if (VD->isMasked()) {
333+
auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
334+
Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
335+
VecTy, {}, CostKind, 0, nullptr, {});
336+
}
337337

338338
// Lowering to a library call (with output pointers) may require us to emit
339339
// reloads for the results.
@@ -1101,11 +1101,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11011101

11021102
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind,
11031103
ArrayRef<int> Mask,
1104-
VectorType *Ty, int &Index,
1104+
VectorType *SrcTy, int &Index,
11051105
VectorType *&SubTy) const {
11061106
if (Mask.empty())
11071107
return Kind;
1108-
int NumSrcElts = Ty->getElementCount().getKnownMinValue();
1108+
int NumSrcElts = SrcTy->getElementCount().getKnownMinValue();
11091109
switch (Kind) {
11101110
case TTI::SK_PermuteSingleSrc: {
11111111
if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))
@@ -1116,7 +1116,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11161116
return TTI::SK_Broadcast;
11171117
if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&
11181118
(Index + Mask.size()) <= (size_t)NumSrcElts) {
1119-
SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size());
1119+
SubTy = FixedVectorType::get(SrcTy->getElementType(), Mask.size());
11201120
return TTI::SK_ExtractSubvector;
11211121
}
11221122
break;
@@ -1127,7 +1127,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11271127
Mask, NumSrcElts, NumSubElts, Index)) {
11281128
if (Index + NumSubElts > NumSrcElts)
11291129
return Kind;
1130-
SubTy = FixedVectorType::get(Ty->getElementType(), NumSubElts);
1130+
SubTy = FixedVectorType::get(SrcTy->getElementType(), NumSubElts);
11311131
return TTI::SK_InsertSubvector;
11321132
}
11331133
if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts))
@@ -1151,13 +1151,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11511151
}
11521152

11531153
InstructionCost
1154-
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
1155-
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
1156-
ArrayRef<const Value *> Args = {},
1154+
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
1155+
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
1156+
VectorType *SubTp, ArrayRef<const Value *> Args = {},
11571157
const Instruction *CxtI = nullptr) const override {
1158-
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
1158+
switch (improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp)) {
11591159
case TTI::SK_Broadcast:
1160-
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1160+
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
11611161
return getBroadcastShuffleOverhead(FVT, CostKind);
11621162
return InstructionCost::getInvalid();
11631163
case TTI::SK_Select:
@@ -1166,14 +1166,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11661166
case TTI::SK_Transpose:
11671167
case TTI::SK_PermuteSingleSrc:
11681168
case TTI::SK_PermuteTwoSrc:
1169-
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1169+
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
11701170
return getPermuteShuffleOverhead(FVT, CostKind);
11711171
return InstructionCost::getInvalid();
11721172
case TTI::SK_ExtractSubvector:
1173-
return getExtractSubvectorOverhead(Tp, CostKind, Index,
1173+
return getExtractSubvectorOverhead(SrcTy, CostKind, Index,
11741174
cast<FixedVectorType>(SubTp));
11751175
case TTI::SK_InsertSubvector:
1176-
return getInsertSubvectorOverhead(Tp, CostKind, Index,
1176+
return getInsertSubvectorOverhead(DstTy, CostKind, Index,
11771177
cast<FixedVectorType>(SubTp));
11781178
}
11791179
llvm_unreachable("Unknown TTI::ShuffleKind");
@@ -1910,6 +1910,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19101910
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
19111911
unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
19121912
return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1913+
cast<VectorType>(RetTy),
19131914
cast<VectorType>(Args[0]->getType()), {},
19141915
CostKind, Index, cast<VectorType>(RetTy));
19151916
}
@@ -1920,17 +1921,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19201921
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
19211922
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
19221923
return thisT()->getShuffleCost(
1923-
TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), {},
1924-
CostKind, Index, cast<VectorType>(Args[1]->getType()));
1924+
TTI::SK_InsertSubvector, cast<VectorType>(RetTy),
1925+
cast<VectorType>(Args[0]->getType()), {}, CostKind, Index,
1926+
cast<VectorType>(Args[1]->getType()));
19251927
}
19261928
case Intrinsic::vector_reverse: {
1927-
return thisT()->getShuffleCost(TTI::SK_Reverse,
1929+
return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),
19281930
cast<VectorType>(Args[0]->getType()), {},
19291931
CostKind, 0, cast<VectorType>(RetTy));
19301932
}
19311933
case Intrinsic::vector_splice: {
19321934
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1933-
return thisT()->getShuffleCost(TTI::SK_Splice,
1935+
return thisT()->getShuffleCost(TTI::SK_Splice, cast<VectorType>(RetTy),
19341936
cast<VectorType>(Args[0]->getType()), {},
19351937
CostKind, Index, cast<VectorType>(RetTy));
19361938
}
@@ -2376,8 +2378,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23762378
CostKind, 1, nullptr, nullptr);
23772379
Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
23782380
CostKind, 0, nullptr, nullptr);
2379-
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
2380-
CostKind, 0, nullptr);
2381+
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, SearchTy,
2382+
std::nullopt, CostKind, 0, nullptr);
23812383
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
23822384
CmpInst::ICMP_EQ, CostKind);
23832385
Cost +=
@@ -2961,8 +2963,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
29612963
while (NumVecElts > MVTLen) {
29622964
NumVecElts /= 2;
29632965
VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2964-
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
2965-
CostKind, NumVecElts, SubTy);
2966+
ShuffleCost += thisT()->getShuffleCost(
2967+
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
29662968
ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
29672969
Ty = SubTy;
29682970
++LongVectorCount;
@@ -2978,7 +2980,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
29782980
// By default reductions need one shuffle per reduction level.
29792981
ShuffleCost +=
29802982
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2981-
{}, CostKind, 0, Ty);
2983+
Ty, {}, CostKind, 0, Ty);
29822984
ArithCost +=
29832985
NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
29842986
return ShuffleCost + ArithCost +
@@ -3052,8 +3054,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
30523054
NumVecElts /= 2;
30533055
auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
30543056

3055-
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
3056-
CostKind, NumVecElts, SubTy);
3057+
ShuffleCost += thisT()->getShuffleCost(
3058+
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
30573059

30583060
IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
30593061
MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
@@ -3069,7 +3071,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
30693071
// architecture-dependent length.
30703072
ShuffleCost +=
30713073
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
3072-
{}, CostKind, 0, Ty);
3074+
Ty, {}, CostKind, 0, Ty);
30733075
IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
30743076
MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
30753077
// The last min/max should be in vector registers and we counted it above.

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -984,11 +984,16 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
984984
}
985985

986986
InstructionCost TargetTransformInfo::getShuffleCost(
987-
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
987+
ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef<int> Mask,
988988
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
989989
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
990-
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
991-
Index, SubTp, Args, CxtI);
990+
assert((Mask.empty() || DstTy->isScalableTy() ||
991+
Mask.size() == DstTy->getElementCount().getKnownMinValue()) &&
992+
"Expected the Mask to match the return size if given");
993+
assert(SrcTy->getScalarType() == DstTy->getScalarType() &&
994+
"Expected the same scalar types");
995+
InstructionCost Cost = TTIImpl->getShuffleCost(
996+
Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp, Args, CxtI);
992997
assert(Cost >= 0 && "TTI should not produce negative costs!");
993998
return Cost;
994999
}

0 commit comments

Comments
 (0)