Skip to content

Commit d7824fa

Browse files
authored
TTI: Check legalization cost of abs nodes (#100523)
1 parent 3fffa6d commit d7824fa

File tree

5 files changed

+242
-231
lines changed

5 files changed

+242
-231
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2120,20 +2120,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
21202120
case Intrinsic::vector_reduce_fminimum:
21212121
return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
21222122
VecOpTy, ICA.getFlags(), CostKind);
2123-
case Intrinsic::abs: {
2124-
// abs(X) = select(icmp(X,0),X,sub(0,X))
2125-
Type *CondTy = RetTy->getWithNewBitWidth(1);
2126-
CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2127-
InstructionCost Cost = 0;
2128-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2129-
Pred, CostKind);
2130-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2131-
Pred, CostKind);
2132-
// TODO: Should we add an OperandValueProperties::OP_Zero property?
2133-
Cost += thisT()->getArithmeticInstrCost(
2134-
BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None});
2135-
return Cost;
2136-
}
2123+
case Intrinsic::abs:
2124+
ISD = ISD::ABS;
2125+
break;
21372126
case Intrinsic::smax:
21382127
ISD = ISD::SMAX;
21392128
break;
@@ -2402,6 +2391,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
24022391
Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
24032392
return Cost;
24042393
}
2394+
case Intrinsic::abs: {
2395+
// abs(X) = select(icmp(X,0),X,sub(0,X))
2396+
Type *CondTy = RetTy->getWithNewBitWidth(1);
2397+
CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
2398+
InstructionCost Cost = 0;
2399+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
2400+
Pred, CostKind);
2401+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
2402+
Pred, CostKind);
2403+
// TODO: Should we add an OperandValueProperties::OP_Zero property?
2404+
Cost += thisT()->getArithmeticInstrCost(
2405+
BinaryOperator::Sub, RetTy, CostKind,
2406+
{TTI::OK_UniformConstantValue, TTI::OP_None});
2407+
return Cost;
2408+
}
24052409
case Intrinsic::fptosi_sat:
24062410
case Intrinsic::fptoui_sat: {
24072411
if (Tys.empty())

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
696696
case Intrinsic::usub_sat:
697697
case Intrinsic::sadd_sat:
698698
case Intrinsic::ssub_sat:
699+
case Intrinsic::abs:
699700
return true;
700701
default:
701702
return false;
@@ -724,7 +725,7 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
724725
if (SLT == MVT::f64)
725726
return LT.first * NElts * get64BitInstrCost(CostKind);
726727

727-
if ((ST->has16BitInsts() && SLT == MVT::f16) ||
728+
if ((ST->has16BitInsts() && (SLT == MVT::f16 || SLT == MVT::i16)) ||
728729
(ST->hasPackedFP32Ops() && SLT == MVT::f32))
729730
NElts = (NElts + 1) / 2;
730731

@@ -752,11 +753,17 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
752753
case Intrinsic::usub_sat:
753754
case Intrinsic::sadd_sat:
754755
case Intrinsic::ssub_sat: {
756+
// TODO: Full rate for i32/i16
755757
static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
756758
if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
757759
NElts = 1;
758760
break;
759761
}
762+
case Intrinsic::abs:
763+
// Expansion takes 2 instructions for VALU
764+
if (SLT == MVT::i16 || SLT == MVT::i32)
765+
InstRate = 2 * getFullRateInstrCost();
766+
break;
760767
default:
761768
break;
762769
}

0 commit comments

Comments
 (0)