Skip to content

[RISCV][TTI] Reduce cost of a build_vector pattern #108419

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,40 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}

static unsigned isM1OrSmaller(MVT VT) {
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
return (LMUL == RISCVII::VLMUL::LMUL_F8 || LMUL == RISCVII::VLMUL::LMUL_F4 ||
LMUL == RISCVII::VLMUL::LMUL_F2 || LMUL == RISCVII::VLMUL::LMUL_1);
}

InstructionCost RISCVTTIImpl::getScalarizationOverhead(
VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
TTI::TargetCostKind CostKind) {
if (isa<ScalableVectorType>(Ty))
return InstructionCost::getInvalid();

// A build_vector (which is m1 sized or smaller) can be done in no
// worse than one vslide1down.vx per element in the type. We could
// in theory do an explode_vector in the inverse manner, but our
// lowering today does not have a first class node for this pattern.
InstructionCost Cost = BaseT::getScalarizationOverhead(
Ty, DemandedElts, Insert, Extract, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
if (Insert && !Extract && LT.first.isValid() && LT.second.isVector() &&
Ty->getScalarSizeInBits() != 1) {
assert(LT.second.isFixedLengthVector());
MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
if (isM1OrSmaller(ContainerVT)) {
InstructionCost BV =
cast<FixedVectorType>(Ty)->getNumElements() *
getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second, CostKind);
if (BV < Cost)
Cost = BV;
}
}
return Cost;
}

InstructionCost
RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
Expand Down Expand Up @@ -767,6 +801,23 @@ InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
return NumLoads * MemOpCost;
}

InstructionCost
RISCVTTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
// FIXME: This is a property of the default vector convention, not
// all possible calling conventions. Fixing that will require
// some TTI API and SLP rework.
InstructionCost Cost = 0;
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
for (auto *Ty : Tys) {
if (!Ty->isVectorTy())
continue;
Align A = DL.getPrefTypeAlign(Ty);
Cost += getMemoryOpCost(Instruction::Store, Ty, A, 0, CostKind) +
getMemoryOpCost(Instruction::Load, Ty, A, 0, CostKind);
}
return Cost;
}

// Currently, these represent both throughput and codesize costs
// for the respective intrinsics. The costs in this table are simply
// instruction counts with the following adjustments made:
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr);

InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract,
TTI::TargetCostKind CostKind);

InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);

Expand All @@ -169,6 +174,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
TTI::TargetCostKind CostKind,
const Instruction *I);

InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);

InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
TTI::TargetCostKind CostKind,
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Analysis/CostModel/RISCV/arith-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,8 @@ define void @frem() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = frem <1 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = frem <2 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F32 = frem <4 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = frem <2 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = frem <4 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> undef, undef
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F32 = frem <vscale x 1 x float> undef, undef
Expand All @@ -371,7 +371,7 @@ define void @frem() {
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV8F32 = frem <vscale x 8 x float> undef, undef
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV16F32 = frem <vscale x 16 x float> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = frem <1 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F64 = frem <2 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef
; CHECK-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F64 = frem <8 x double> undef, undef
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %NXV1F64 = frem <vscale x 1 x double> undef, undef
Expand Down Expand Up @@ -412,9 +412,9 @@ define void @frem_f16() {
; ZVFH-LABEL: 'frem_f16'
; ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F16 = frem <2 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F16 = frem <4 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F16 = frem <8 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> undef, undef
; ZVFH-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> undef, undef
; ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = frem <vscale x 1 x half> undef, undef
Expand All @@ -428,9 +428,9 @@ define void @frem_f16() {
; ZVFHMIN-LABEL: 'frem_f16'
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F16 = frem half undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F16 = frem <1 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F16 = frem <2 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F16 = frem <4 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F16 = frem <8 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F16 = frem <2 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F16 = frem <4 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F16 = frem <8 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F16 = frem <16 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V32F16 = frem <32 x half> undef, undef
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NXV1F16 = frem <vscale x 1 x half> undef, undef
Expand Down Expand Up @@ -620,9 +620,9 @@ define void @fcopysign_f16() {
; ZVFHMIN-LABEL: 'fcopysign_f16'
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F16 = call half @llvm.copysign.f16(half undef, half undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1F16 = call <1 x half> @llvm.copysign.v1f16(<1 x half> undef, <1 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F16 = call <2 x half> @llvm.copysign.v2f16(<2 x half> undef, <2 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F16 = call <4 x half> @llvm.copysign.v4f16(<4 x half> undef, <4 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F16 = call <8 x half> @llvm.copysign.v8f16(<8 x half> undef, <8 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F16 = call <16 x half> @llvm.copysign.v16f16(<16 x half> undef, <16 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V32F16 = call <32 x half> @llvm.copysign.v32f16(<32 x half> undef, <32 x half> undef)
; ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1F16 = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> undef, <vscale x 1 x half> undef)
Expand Down
Loading
Loading