Skip to content

Commit 3e75dec

Browse files
authored
[RISCV][CostModel] Add getRISCVInstructionCost() to TTI for Cost… (#73651)
…Kind Instruction cost for CodeSize and Latency/RecipThroughput can be very different. Considering the diversity of CostKind and vendor-specific cost, and how they are spread across various TTI functions, it's becoming quite a challenge to handle. This patch adds an interface getRISCVInstructionCost to address it.
1 parent 9807305 commit 3e75dec

13 files changed

+725
-16
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2711,11 +2711,19 @@ InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
27112711
return getLMULCost(VT);
27122712
}
27132713

2714-
/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2714+
/// Return the cost of a vslidedown.vx or vslideup.vx instruction
27152715
/// for the type VT. (This does not cover the vslide1up or vslide1down
27162716
/// variants.) Slides may be linear in the number of vregs implied by LMUL,
27172717
/// or may track the vrgather.vv cost. It is implementation-dependent.
2718-
InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
2718+
InstructionCost RISCVTargetLowering::getVSlideVXCost(MVT VT) const {
2719+
return getLMULCost(VT);
2720+
}
2721+
2722+
/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2723+
/// for the type VT. (This does not cover the vslide1up or vslide1down
2724+
/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2725+
/// or may track the vrgather.vv cost. It is implementation-dependent.
2726+
InstructionCost RISCVTargetLowering::getVSlideVICost(MVT VT) const {
27192727
return getLMULCost(VT);
27202728
}
27212729

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,8 @@ class RISCVTargetLowering : public TargetLowering {
526526

527527
InstructionCost getVRGatherVVCost(MVT VT) const;
528528
InstructionCost getVRGatherVICost(MVT VT) const;
529-
InstructionCost getVSlideCost(MVT VT) const;
529+
InstructionCost getVSlideVXCost(MVT VT) const;
530+
InstructionCost getVSlideVICost(MVT VT) const;
530531

531532
// Provide custom lowering hooks for some operations.
532533
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 103 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,65 @@ static cl::opt<unsigned> SLPMaxVF(
3434
"exclusively by SLP vectorizer."),
3535
cl::Hidden);
3636

37+
InstructionCost
38+
RISCVTTIImpl::getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
39+
TTI::TargetCostKind CostKind) {
40+
size_t NumInstr = OpCodes.size();
41+
if (CostKind == TTI::TCK_CodeSize)
42+
return NumInstr;
43+
InstructionCost LMULCost = TLI->getLMULCost(VT);
44+
if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency))
45+
return LMULCost * NumInstr;
46+
InstructionCost Cost = 0;
47+
for (auto Op : OpCodes) {
48+
switch (Op) {
49+
case RISCV::VRGATHER_VI:
50+
Cost += TLI->getVRGatherVICost(VT);
51+
break;
52+
case RISCV::VRGATHER_VV:
53+
Cost += TLI->getVRGatherVVCost(VT);
54+
break;
55+
case RISCV::VSLIDEUP_VI:
56+
case RISCV::VSLIDEDOWN_VI:
57+
Cost += TLI->getVSlideVICost(VT);
58+
break;
59+
case RISCV::VSLIDEUP_VX:
60+
case RISCV::VSLIDEDOWN_VX:
61+
Cost += TLI->getVSlideVXCost(VT);
62+
break;
63+
case RISCV::VREDMAX_VS:
64+
case RISCV::VREDMIN_VS:
65+
case RISCV::VREDMAXU_VS:
66+
case RISCV::VREDMINU_VS:
67+
case RISCV::VREDSUM_VS:
68+
case RISCV::VREDAND_VS:
69+
case RISCV::VREDOR_VS:
70+
case RISCV::VREDXOR_VS:
71+
case RISCV::VFREDMAX_VS:
72+
case RISCV::VFREDMIN_VS:
73+
case RISCV::VFREDUSUM_VS: {
74+
unsigned VL = VT.getVectorMinNumElements();
75+
if (!VT.isFixedLengthVector())
76+
VL *= *getVScaleForTuning();
77+
Cost += Log2_32_Ceil(VL);
78+
break;
79+
}
80+
case RISCV::VFREDOSUM_VS: {
81+
unsigned VL = VT.getVectorMinNumElements();
82+
if (!VT.isFixedLengthVector())
83+
VL *= *getVScaleForTuning();
84+
Cost += VL;
85+
break;
86+
}
87+
case RISCV::VMV_S_X:
88+
// FIXME: VMV_S_X doesn't use LMUL, the cost should be 1
89+
default:
90+
Cost += LMULCost;
91+
}
92+
}
93+
return Cost;
94+
}
95+
3796
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
3897
TTI::TargetCostKind CostKind) {
3998
assert(Ty->isIntegerTy() &&
@@ -281,7 +340,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
281340
// Example sequence:
282341
// vnsrl.wi v10, v8, 0
283342
if (equal(DeinterleaveMask, Mask))
284-
return LT.first * TLI->getLMULCost(LT.second);
343+
return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
344+
LT.second, CostKind);
285345
}
286346
}
287347
}
@@ -292,7 +352,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
292352
LT.second.getVectorNumElements() <= 256)) {
293353
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
294354
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
295-
return IndexCost + TLI->getVRGatherVVCost(LT.second);
355+
return IndexCost +
356+
getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
296357
}
297358
[[fallthrough]];
298359
}
@@ -310,7 +371,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
310371
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
311372
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
312373
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
313-
return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost;
374+
return 2 * IndexCost +
375+
getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
376+
LT.second, CostKind) +
377+
MaskCost;
314378
}
315379
[[fallthrough]];
316380
}
@@ -365,19 +429,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
365429
// Example sequence:
366430
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
367431
// vslidedown.vi v8, v9, 2
368-
return LT.first * TLI->getVSlideCost(LT.second);
432+
return LT.first *
433+
getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second, CostKind);
369434
case TTI::SK_InsertSubvector:
370435
// Example sequence:
371436
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
372437
// vslideup.vi v8, v9, 2
373-
return LT.first * TLI->getVSlideCost(LT.second);
438+
return LT.first *
439+
getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second, CostKind);
374440
case TTI::SK_Select: {
375441
// Example sequence:
376442
// li a0, 90
377443
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
378444
// vmv.s.x v0, a0
379445
// vmerge.vvm v8, v9, v8, v0
380-
return LT.first * 3 * TLI->getLMULCost(LT.second);
446+
return LT.first *
447+
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for li
448+
getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
449+
LT.second, CostKind));
381450
}
382451
case TTI::SK_Broadcast: {
383452
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -389,7 +458,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
389458
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
390459
// vmv.v.x v8, a0
391460
// vmsne.vi v0, v8, 0
392-
return LT.first * TLI->getLMULCost(LT.second) * 3;
461+
return LT.first *
462+
(TLI->getLMULCost(LT.second) + // FIXME: should be 1 for andi
463+
getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
464+
LT.second, CostKind));
393465
}
394466
// Example sequence:
395467
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -400,24 +472,40 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
400472
// vmv.v.x v8, a0
401473
// vmsne.vi v0, v8, 0
402474

403-
return LT.first * TLI->getLMULCost(LT.second) * 6;
475+
return LT.first *
476+
(TLI->getLMULCost(LT.second) + // FIXME: this should be 1 for andi
477+
TLI->getLMULCost(
478+
LT.second) + // FIXME: vmv.x.s is the same as extractelement
479+
getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
480+
RISCV::VMV_V_X, RISCV::VMSNE_VI},
481+
LT.second, CostKind));
404482
}
405483

406484
if (HasScalar) {
407485
// Example sequence:
408486
// vmv.v.x v8, a0
409-
return LT.first * TLI->getLMULCost(LT.second);
487+
return LT.first *
488+
getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
410489
}
411490

412491
// Example sequence:
413492
// vrgather.vi v9, v8, 0
414-
return LT.first * TLI->getVRGatherVICost(LT.second);
493+
return LT.first *
494+
getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second, CostKind);
415495
}
416-
case TTI::SK_Splice:
496+
case TTI::SK_Splice: {
417497
// vslidedown+vslideup.
418498
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
419499
// of similar code, but I think we expand through memory.
420-
return 2 * LT.first * TLI->getVSlideCost(LT.second);
500+
ArrayRef<unsigned> Opcodes;
501+
if (Index >= 0 && Index < 32)
502+
Opcodes = {RISCV::VSLIDEDOWN_VI, RISCV::VSLIDEUP_VX};
503+
else if (Index < 0 && Index > -32)
504+
Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VI};
505+
else
506+
Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
507+
return LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
508+
}
421509
case TTI::SK_Reverse: {
422510
// TODO: Cases to improve here:
423511
// * Illegal vector types
@@ -437,7 +525,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
437525
if (LT.second.isFixedLengthVector())
438526
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
439527
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
440-
InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
528+
// FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX}
529+
InstructionCost GatherCost =
530+
2 + getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second, CostKind);
441531
// Mask operation additionally required extend and truncate
442532
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
443533
return LT.first * (LenCost + GatherCost + ExtendCost);

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
4848
/// actual target hardware.
4949
unsigned getEstimatedVLFor(VectorType *Ty);
5050

51+
InstructionCost getRISCVInstructionCost(ArrayRef<unsigned> OpCodes, MVT VT,
52+
TTI::TargetCostKind CostKind);
53+
5154
/// Return the cost of accessing a constant pool entry of the specified
5255
/// type.
5356
InstructionCost getConstantPoolLoadCost(Type *Ty,

0 commit comments

Comments
 (0)