@@ -329,11 +329,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
329
329
// Cost the call + mask.
330
330
auto Cost =
331
331
thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (), CostKind);
332
- if (VD->isMasked ())
333
- Cost += thisT ()-> getShuffleCost (
334
- TargetTransformInfo::SK_Broadcast,
335
- VectorType::get ( IntegerType::getInt1Ty (Ctx), VF) , {}, CostKind, 0 ,
336
- nullptr , {});
332
+ if (VD->isMasked ()) {
333
+ auto VecTy = VectorType::get ( IntegerType::getInt1Ty (Ctx), VF);
334
+ Cost += thisT ()-> getShuffleCost ( TargetTransformInfo::SK_Broadcast, VecTy ,
335
+ VecTy , {}, CostKind, 0 , nullptr , {});
336
+ }
337
337
338
338
// Lowering to a library call (with output pointers) may require us to emit
339
339
// reloads for the results.
@@ -1101,11 +1101,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1101
1101
1102
1102
TTI::ShuffleKind improveShuffleKindFromMask (TTI::ShuffleKind Kind,
1103
1103
ArrayRef<int > Mask,
1104
- VectorType *Ty , int &Index,
1104
+ VectorType *SrcTy , int &Index,
1105
1105
VectorType *&SubTy) const {
1106
1106
if (Mask.empty ())
1107
1107
return Kind;
1108
- int NumSrcElts = Ty ->getElementCount ().getKnownMinValue ();
1108
+ int NumSrcElts = SrcTy ->getElementCount ().getKnownMinValue ();
1109
1109
switch (Kind) {
1110
1110
case TTI::SK_PermuteSingleSrc: {
1111
1111
if (ShuffleVectorInst::isReverseMask (Mask, NumSrcElts))
@@ -1116,7 +1116,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1116
1116
return TTI::SK_Broadcast;
1117
1117
if (ShuffleVectorInst::isExtractSubvectorMask (Mask, NumSrcElts, Index) &&
1118
1118
(Index + Mask.size ()) <= (size_t )NumSrcElts) {
1119
- SubTy = FixedVectorType::get (Ty ->getElementType (), Mask.size ());
1119
+ SubTy = FixedVectorType::get (SrcTy ->getElementType (), Mask.size ());
1120
1120
return TTI::SK_ExtractSubvector;
1121
1121
}
1122
1122
break ;
@@ -1127,7 +1127,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1127
1127
Mask, NumSrcElts, NumSubElts, Index)) {
1128
1128
if (Index + NumSubElts > NumSrcElts)
1129
1129
return Kind;
1130
- SubTy = FixedVectorType::get (Ty ->getElementType (), NumSubElts);
1130
+ SubTy = FixedVectorType::get (SrcTy ->getElementType (), NumSubElts);
1131
1131
return TTI::SK_InsertSubvector;
1132
1132
}
1133
1133
if (ShuffleVectorInst::isSelectMask (Mask, NumSrcElts))
@@ -1151,13 +1151,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1151
1151
}
1152
1152
1153
1153
InstructionCost
1154
- getShuffleCost (TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask ,
1155
- TTI::TargetCostKind CostKind, int Index, VectorType *SubTp ,
1156
- ArrayRef<const Value *> Args = {},
1154
+ getShuffleCost (TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy ,
1155
+ ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index,
1156
+ VectorType *SubTp, ArrayRef<const Value *> Args = {},
1157
1157
const Instruction *CxtI = nullptr ) const override {
1158
- switch (improveShuffleKindFromMask (Kind, Mask, Tp , Index, SubTp)) {
1158
+ switch (improveShuffleKindFromMask (Kind, Mask, SrcTy , Index, SubTp)) {
1159
1159
case TTI::SK_Broadcast:
1160
- if (auto *FVT = dyn_cast<FixedVectorType>(Tp ))
1160
+ if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy ))
1161
1161
return getBroadcastShuffleOverhead (FVT, CostKind);
1162
1162
return InstructionCost::getInvalid ();
1163
1163
case TTI::SK_Select:
@@ -1166,14 +1166,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1166
1166
case TTI::SK_Transpose:
1167
1167
case TTI::SK_PermuteSingleSrc:
1168
1168
case TTI::SK_PermuteTwoSrc:
1169
- if (auto *FVT = dyn_cast<FixedVectorType>(Tp ))
1169
+ if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy ))
1170
1170
return getPermuteShuffleOverhead (FVT, CostKind);
1171
1171
return InstructionCost::getInvalid ();
1172
1172
case TTI::SK_ExtractSubvector:
1173
- return getExtractSubvectorOverhead (Tp , CostKind, Index,
1173
+ return getExtractSubvectorOverhead (SrcTy , CostKind, Index,
1174
1174
cast<FixedVectorType>(SubTp));
1175
1175
case TTI::SK_InsertSubvector:
1176
- return getInsertSubvectorOverhead (Tp , CostKind, Index,
1176
+ return getInsertSubvectorOverhead (DstTy , CostKind, Index,
1177
1177
cast<FixedVectorType>(SubTp));
1178
1178
}
1179
1179
llvm_unreachable (" Unknown TTI::ShuffleKind" );
@@ -1910,6 +1910,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1910
1910
return BaseT::getIntrinsicInstrCost (ICA, CostKind);
1911
1911
unsigned Index = cast<ConstantInt>(Args[1 ])->getZExtValue ();
1912
1912
return thisT ()->getShuffleCost (TTI::SK_ExtractSubvector,
1913
+ cast<VectorType>(RetTy),
1913
1914
cast<VectorType>(Args[0 ]->getType ()), {},
1914
1915
CostKind, Index, cast<VectorType>(RetTy));
1915
1916
}
@@ -1920,17 +1921,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1920
1921
return BaseT::getIntrinsicInstrCost (ICA, CostKind);
1921
1922
unsigned Index = cast<ConstantInt>(Args[2 ])->getZExtValue ();
1922
1923
return thisT ()->getShuffleCost (
1923
- TTI::SK_InsertSubvector, cast<VectorType>(Args[0 ]->getType ()), {},
1924
- CostKind, Index, cast<VectorType>(Args[1 ]->getType ()));
1924
+ TTI::SK_InsertSubvector, cast<VectorType>(RetTy),
1925
+ cast<VectorType>(Args[0 ]->getType ()), {}, CostKind, Index,
1926
+ cast<VectorType>(Args[1 ]->getType ()));
1925
1927
}
1926
1928
case Intrinsic::vector_reverse: {
1927
- return thisT ()->getShuffleCost (TTI::SK_Reverse,
1929
+ return thisT ()->getShuffleCost (TTI::SK_Reverse, cast<VectorType>(RetTy),
1928
1930
cast<VectorType>(Args[0 ]->getType ()), {},
1929
1931
CostKind, 0 , cast<VectorType>(RetTy));
1930
1932
}
1931
1933
case Intrinsic::vector_splice: {
1932
1934
unsigned Index = cast<ConstantInt>(Args[2 ])->getZExtValue ();
1933
- return thisT ()->getShuffleCost (TTI::SK_Splice,
1935
+ return thisT ()->getShuffleCost (TTI::SK_Splice, cast<VectorType>(RetTy),
1934
1936
cast<VectorType>(Args[0 ]->getType ()), {},
1935
1937
CostKind, Index, cast<VectorType>(RetTy));
1936
1938
}
@@ -2376,8 +2378,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2376
2378
CostKind, 1 , nullptr , nullptr );
2377
2379
Cost += thisT ()->getVectorInstrCost (Instruction::InsertElement, SearchTy,
2378
2380
CostKind, 0 , nullptr , nullptr );
2379
- Cost += thisT ()->getShuffleCost (TTI::SK_Broadcast, SearchTy, std::nullopt ,
2380
- CostKind, 0 , nullptr );
2381
+ Cost += thisT ()->getShuffleCost (TTI::SK_Broadcast, SearchTy, SearchTy ,
2382
+ std::nullopt, CostKind, 0 , nullptr );
2381
2383
Cost += thisT ()->getCmpSelInstrCost (BinaryOperator::ICmp, SearchTy, RetTy,
2382
2384
CmpInst::ICMP_EQ, CostKind);
2383
2385
Cost +=
@@ -2961,8 +2963,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2961
2963
while (NumVecElts > MVTLen) {
2962
2964
NumVecElts /= 2 ;
2963
2965
VectorType *SubTy = FixedVectorType::get (ScalarTy, NumVecElts);
2964
- ShuffleCost += thisT ()->getShuffleCost (TTI::SK_ExtractSubvector, Ty, {},
2965
- CostKind, NumVecElts, SubTy);
2966
+ ShuffleCost += thisT ()->getShuffleCost (
2967
+ TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
2966
2968
ArithCost += thisT ()->getArithmeticInstrCost (Opcode, SubTy, CostKind);
2967
2969
Ty = SubTy;
2968
2970
++LongVectorCount;
@@ -2978,7 +2980,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2978
2980
// By default reductions need one shuffle per reduction level.
2979
2981
ShuffleCost +=
2980
2982
NumReduxLevels * thisT ()->getShuffleCost (TTI::SK_PermuteSingleSrc, Ty,
2981
- {}, CostKind, 0 , Ty);
2983
+ Ty, {}, CostKind, 0 , Ty);
2982
2984
ArithCost +=
2983
2985
NumReduxLevels * thisT ()->getArithmeticInstrCost (Opcode, Ty, CostKind);
2984
2986
return ShuffleCost + ArithCost +
@@ -3052,8 +3054,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
3052
3054
NumVecElts /= 2 ;
3053
3055
auto *SubTy = FixedVectorType::get (ScalarTy, NumVecElts);
3054
3056
3055
- ShuffleCost += thisT ()->getShuffleCost (TTI::SK_ExtractSubvector, Ty, {},
3056
- CostKind, NumVecElts, SubTy);
3057
+ ShuffleCost += thisT ()->getShuffleCost (
3058
+ TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
3057
3059
3058
3060
IntrinsicCostAttributes Attrs (IID, SubTy, {SubTy, SubTy}, FMF);
3059
3061
MinMaxCost += getIntrinsicInstrCost (Attrs, CostKind);
@@ -3069,7 +3071,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
3069
3071
// architecture-dependent length.
3070
3072
ShuffleCost +=
3071
3073
NumReduxLevels * thisT ()->getShuffleCost (TTI::SK_PermuteSingleSrc, Ty,
3072
- {}, CostKind, 0 , Ty);
3074
+ Ty, {}, CostKind, 0 , Ty);
3073
3075
IntrinsicCostAttributes Attrs (IID, Ty, {Ty, Ty}, FMF);
3074
3076
MinMaxCost += NumReduxLevels * getIntrinsicInstrCost (Attrs, CostKind);
3075
3077
// The last min/max should be in vector registers and we counted it above.
0 commit comments