Skip to content

Commit 463c9f4

Browse files
committed
[RISCV] Move slide and gather costing to TLI [NFC] (PR #65396)
As mentioned in TODOs from D159332. This PR doesn't actually common up that copy of the code because doing so is not NFC - due to DLEN. Fixing that will be a future PR.
1 parent dbd548d commit 463c9f4

File tree

4 files changed

+68
-66
lines changed

4 files changed

+68
-66
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2509,6 +2509,51 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
25092509
return false;
25102510
}
25112511

2512+
InstructionCost RISCVTargetLowering::getLMULCost(MVT VT) const {
2513+
// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2514+
// implementation-defined.
2515+
if (!VT.isVector())
2516+
return InstructionCost::getInvalid();
2517+
unsigned DLenFactor = Subtarget.getDLenFactor();
2518+
unsigned Cost;
2519+
if (VT.isScalableVector()) {
2520+
unsigned LMul;
2521+
bool Fractional;
2522+
std::tie(LMul, Fractional) =
2523+
RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
2524+
if (Fractional)
2525+
Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2526+
else
2527+
Cost = (LMul * DLenFactor);
2528+
} else {
2529+
Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2530+
}
2531+
return Cost;
2532+
}
2533+
2534+
2535+
/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2536+
/// is generally quadratic in the number of vreg implied by LMUL. Note that
2537+
/// operand (index and possibly mask) are handled separately.
2538+
InstructionCost RISCVTargetLowering::getVRGatherVVCost(MVT VT) const {
2539+
return getLMULCost(VT) * getLMULCost(VT);
2540+
}
2541+
2542+
/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2543+
/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2544+
/// or may track the vrgather.vv cost. It is implementation-dependent.
2545+
InstructionCost RISCVTargetLowering::getVRGatherVICost(MVT VT) const {
2546+
return getLMULCost(VT);
2547+
}
2548+
2549+
/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2550+
/// for the type VT. (This does not cover the vslide1up or vslide1down
2551+
/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2552+
/// or may track the vrgather.vv cost. It is implementation-dependent.
2553+
InstructionCost RISCVTargetLowering::getVSlideCost(MVT VT) const {
2554+
return getLMULCost(VT);
2555+
}
2556+
25122557
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
25132558
const RISCVSubtarget &Subtarget) {
25142559
// RISC-V FP-to-int conversions saturate to the destination register size, but

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <optional>
2323

2424
namespace llvm {
25+
class InstructionCost;
2526
class RISCVSubtarget;
2627
struct RISCVRegisterInfo;
2728
namespace RISCVISD {
@@ -520,6 +521,13 @@ class RISCVTargetLowering : public TargetLowering {
520521
shouldExpandBuildVectorWithShuffles(EVT VT,
521522
unsigned DefinedValues) const override;
522523

524+
/// Return the cost of LMUL for linear operations.
525+
InstructionCost getLMULCost(MVT VT) const;
526+
527+
InstructionCost getVRGatherVVCost(MVT VT) const;
528+
InstructionCost getVRGatherVICost(MVT VT) const;
529+
InstructionCost getVSlideCost(MVT VT) const;
530+
523531
// Provide custom lowering hooks for some operations.
524532
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
525533
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 15 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -34,28 +34,6 @@ static cl::opt<unsigned> SLPMaxVF(
3434
"exclusively by SLP vectorizer."),
3535
cl::Hidden);
3636

37-
InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) const {
38-
// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
39-
// implementation-defined.
40-
if (!VT.isVector())
41-
return InstructionCost::getInvalid();
42-
unsigned DLenFactor = ST->getDLenFactor();
43-
unsigned Cost;
44-
if (VT.isScalableVector()) {
45-
unsigned LMul;
46-
bool Fractional;
47-
std::tie(LMul, Fractional) =
48-
RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
49-
if (Fractional)
50-
Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
51-
else
52-
Cost = (LMul * DLenFactor);
53-
} else {
54-
Cost = divideCeil(VT.getSizeInBits(), ST->getRealMinVLen() / DLenFactor);
55-
}
56-
return Cost;
57-
}
58-
5937
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
6038
TTI::TargetCostKind CostKind) {
6139
assert(Ty->isIntegerTy() &&
@@ -263,28 +241,6 @@ static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
263241
return cast<VectorType>(EVT(IndexVT).getTypeForEVT(C));
264242
}
265243

266-
/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
267-
/// is generally quadratic in the number of vreg implied by LMUL. Note that
268-
/// operand (index and possibly mask) are handled separately.
269-
InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) const {
270-
return getLMULCost(VT) * getLMULCost(VT);
271-
}
272-
273-
/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
274-
/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
275-
/// or may track the vrgather.vv cost. It is implementation-dependent.
276-
InstructionCost RISCVTTIImpl::getVRGatherVICost(MVT VT) const {
277-
return getLMULCost(VT);
278-
}
279-
280-
/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
281-
/// for the type VT. (This does not cover the vslide1up or vslide1down
282-
/// variants.) Slides may be linear in the number of vregs implied by LMUL,
283-
/// or may track the vrgather.vv cost. It is implementation-dependent.
284-
InstructionCost RISCVTTIImpl::getVSlideCost(MVT VT) const {
285-
return getLMULCost(VT);
286-
}
287-
288244
InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
289245
VectorType *Tp, ArrayRef<int> Mask,
290246
TTI::TargetCostKind CostKind,
@@ -314,14 +270,14 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
314270
// li a0, -1 (ignored)
315271
// vwmaccu.vx v10, a0, v9
316272
if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size()))
317-
return 2 * LT.first * getLMULCost(LT.second);
273+
return 2 * LT.first * TLI->getLMULCost(LT.second);
318274

319275
if (Mask[0] == 0 || Mask[0] == 1) {
320276
auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
321277
// Example sequence:
322278
// vnsrl.wi v10, v8, 0
323279
if (equal(DeinterleaveMask, Mask))
324-
return LT.first * getLMULCost(LT.second);
280+
return LT.first * TLI->getLMULCost(LT.second);
325281
}
326282
}
327283
}
@@ -332,7 +288,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
332288
LT.second.getVectorNumElements() <= 256)) {
333289
VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
334290
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
335-
return IndexCost + getVRGatherVVCost(LT.second);
291+
return IndexCost + TLI->getVRGatherVVCost(LT.second);
336292
}
337293
[[fallthrough]];
338294
}
@@ -350,7 +306,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
350306
VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
351307
InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
352308
InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
353-
return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
309+
return 2 * IndexCost + 2 * TLI->getVRGatherVVCost(LT.second) + MaskCost;
354310
}
355311
[[fallthrough]];
356312
}
@@ -402,19 +358,19 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
402358
// Example sequence:
403359
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
404360
// vslidedown.vi v8, v9, 2
405-
return LT.first * getVSlideCost(LT.second);
361+
return LT.first * TLI->getVSlideCost(LT.second);
406362
case TTI::SK_InsertSubvector:
407363
// Example sequence:
408364
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
409365
// vslideup.vi v8, v9, 2
410-
return LT.first * getVSlideCost(LT.second);
366+
return LT.first * TLI->getVSlideCost(LT.second);
411367
case TTI::SK_Select: {
412368
// Example sequence:
413369
// li a0, 90
414370
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
415371
// vmv.s.x v0, a0
416372
// vmerge.vvm v8, v9, v8, v0
417-
return LT.first * 3 * getLMULCost(LT.second);
373+
return LT.first * 3 * TLI->getLMULCost(LT.second);
418374
}
419375
case TTI::SK_Broadcast: {
420376
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
@@ -426,7 +382,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
426382
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
427383
// vmv.v.x v8, a0
428384
// vmsne.vi v0, v8, 0
429-
return LT.first * getLMULCost(LT.second) * 3;
385+
return LT.first * TLI->getLMULCost(LT.second) * 3;
430386
}
431387
// Example sequence:
432388
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -437,24 +393,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
437393
// vmv.v.x v8, a0
438394
// vmsne.vi v0, v8, 0
439395

440-
return LT.first * getLMULCost(LT.second) * 6;
396+
return LT.first * TLI->getLMULCost(LT.second) * 6;
441397
}
442398

443399
if (HasScalar) {
444400
// Example sequence:
445401
// vmv.v.x v8, a0
446-
return LT.first * getLMULCost(LT.second);
402+
return LT.first * TLI->getLMULCost(LT.second);
447403
}
448404

449405
// Example sequence:
450406
// vrgather.vi v9, v8, 0
451-
return LT.first * getVRGatherVICost(LT.second);
407+
return LT.first * TLI->getVRGatherVICost(LT.second);
452408
}
453409
case TTI::SK_Splice:
454410
// vslidedown+vslideup.
455411
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
456412
// of similar code, but I think we expand through memory.
457-
return 2 * LT.first * getVSlideCost(LT.second);
413+
return 2 * LT.first * TLI->getVSlideCost(LT.second);
458414
case TTI::SK_Reverse: {
459415
// TODO: Cases to improve here:
460416
// * Illegal vector types
@@ -474,7 +430,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
474430
if (LT.second.isFixedLengthVector())
475431
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
476432
LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
477-
InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second);
433+
InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost(LT.second);
478434
// Mask operation additionally required extend and truncate
479435
InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
480436
return LT.first * (LenCost + GatherCost + ExtendCost);
@@ -1393,7 +1349,7 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
13931349
// handles the LT.first term for us.
13941350
if (std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
13951351
LT.second.isVector())
1396-
BaseCost *= getLMULCost(LT.second);
1352+
BaseCost *= TLI->getLMULCost(LT.second);
13971353
return Cost + BaseCost;
13981354

13991355
}
@@ -1641,7 +1597,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
16411597
case ISD::FSUB:
16421598
case ISD::FMUL:
16431599
case ISD::FNEG: {
1644-
return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
1600+
return ConstantMatCost + TLI->getLMULCost(LT.second) * LT.first * 1;
16451601
}
16461602
default:
16471603
return ConstantMatCost +

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
4848
/// actual target hardware.
4949
unsigned getEstimatedVLFor(VectorType *Ty);
5050

51-
/// Return the cost of LMUL. The larger the LMUL, the higher the cost.
52-
InstructionCost getLMULCost(MVT VT) const;
53-
5451
/// Return the cost of accessing a constant pool entry of the specified
5552
/// type.
5653
InstructionCost getConstantPoolLoadCost(Type *Ty,
@@ -123,10 +120,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
123120
return ST->useRVVForFixedLengthVectors() ? 16 : 0;
124121
}
125122

126-
InstructionCost getVRGatherVVCost(MVT VT) const;
127-
InstructionCost getVRGatherVICost(MVT VT) const;
128-
InstructionCost getVSlideCost(MVT VT) const;
129-
130123
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
131124
ArrayRef<int> Mask,
132125
TTI::TargetCostKind CostKind, int Index,

0 commit comments

Comments
 (0)