@@ -34,6 +34,51 @@ static cl::opt<unsigned> SLPMaxVF(
34
34
" exclusively by SLP vectorizer." ),
35
35
cl::Hidden);
36
36
37
+ InstructionCost
38
+ RISCVTTIImpl::getRISCVInstructionCost (RISCVInstruction Inst, MVT VT,
39
+ unsigned NumInstr,
40
+ TTI::TargetCostKind CostKind) {
41
+ if (CostKind == TTI::TCK_CodeSize)
42
+ return NumInstr;
43
+
44
+ InstructionCost LMUL = TLI->getLMULCost (VT);
45
+ InstructionCost Cost = LMUL * NumInstr;
46
+
47
+ if ((CostKind == TTI::TCK_RecipThroughput) ||
48
+ (CostKind == TTI::TCK_Latency)) {
49
+ switch (Inst) {
50
+ case RISCVInstruction::VRGATHER_VI:
51
+ return NumInstr * TLI->getVRGatherVICost (VT);
52
+ case RISCVInstruction::VRGATHER_VV:
53
+ return NumInstr * TLI->getVRGatherVVCost (VT);
54
+ case RISCVInstruction::VSLIDE:
55
+ return NumInstr * TLI->getVSlideCost (VT);
56
+ case RISCVInstruction::VSIMPLE_INT_RED:
57
+ case RISCVInstruction::VMINMAX_INTFP_RED:
58
+ case RISCVInstruction::VUNORD_FP_RED: {
59
+ unsigned VL = VT.getVectorMinNumElements ();
60
+ if (!VT.isFixedLengthVector ()) {
61
+ VL *= *getVScaleForTuning ();
62
+ }
63
+ return Log2_32_Ceil (VL);
64
+ }
65
+ case RISCVInstruction::VORD_FP_RED: {
66
+ unsigned VL = VT.getVectorMinNumElements ();
67
+ if (!VT.isFixedLengthVector ()) {
68
+ VL *= *getVScaleForTuning ();
69
+ }
70
+ return VL;
71
+ }
72
+ case RISCVInstruction::VMERGE:
73
+ case RISCVInstruction::VMV:
74
+ case RISCVInstruction::VSIMPLE_INT:
75
+ case RISCVInstruction::VNARROWING:
76
+ return Cost;
77
+ }
78
+ }
79
+ return Cost;
80
+ }
81
+
37
82
InstructionCost RISCVTTIImpl::getIntImmCost (const APInt &Imm, Type *Ty,
38
83
TTI::TargetCostKind CostKind) {
39
84
assert (Ty->isIntegerTy () &&
@@ -279,7 +324,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
279
324
// Example sequence:
280
325
// vnsrl.wi v10, v8, 0
281
326
if (equal (DeinterleaveMask, Mask))
282
- return LT.first * TLI->getLMULCost (LT.second );
327
+ return LT.first *
328
+ getRISCVInstructionCost (RISCVInstruction::VNARROWING,
329
+ LT.second , 1 , CostKind);
283
330
}
284
331
}
285
332
}
@@ -290,7 +337,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
290
337
LT.second .getVectorNumElements () <= 256 )) {
291
338
VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, Tp->getContext ());
292
339
InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
293
- return IndexCost + TLI->getVRGatherVVCost (LT.second );
340
+ return IndexCost +
341
+ getRISCVInstructionCost (RISCVInstruction::VRGATHER_VV, LT.second ,
342
+ 1 , CostKind);
294
343
}
295
344
[[fallthrough]];
296
345
}
@@ -308,7 +357,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
308
357
VectorType *MaskTy = VectorType::get (IntegerType::getInt1Ty (C), EC);
309
358
InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
310
359
InstructionCost MaskCost = getConstantPoolLoadCost (MaskTy, CostKind);
311
- return 2 * IndexCost + 2 * TLI->getVRGatherVVCost (LT.second ) + MaskCost;
360
+ return 2 * IndexCost +
361
+ getRISCVInstructionCost (RISCVInstruction::VRGATHER_VV, LT.second ,
362
+ 2 , CostKind) +
363
+ MaskCost;
312
364
}
313
365
[[fallthrough]];
314
366
}
@@ -363,19 +415,26 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
363
415
// Example sequence:
364
416
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
365
417
// vslidedown.vi v8, v9, 2
366
- return LT.first * TLI->getVSlideCost (LT.second );
418
+ return LT.first * getRISCVInstructionCost (RISCVInstruction::VSLIDE,
419
+ LT.second , 1 , CostKind);
367
420
case TTI::SK_InsertSubvector:
368
421
// Example sequence:
369
422
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
370
423
// vslideup.vi v8, v9, 2
371
- return LT.first * TLI->getVSlideCost (LT.second );
424
+ return LT.first * getRISCVInstructionCost (RISCVInstruction::VSLIDE,
425
+ LT.second , 1 , CostKind);
372
426
case TTI::SK_Select: {
373
427
// Example sequence:
374
428
// li a0, 90
375
429
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
376
430
// vmv.s.x v0, a0
377
431
// vmerge.vvm v8, v9, v8, v0
378
- return LT.first * 3 * TLI->getLMULCost (LT.second );
432
+ return LT.first *
433
+ (TLI->getLMULCost (LT.second ) + // FIXME: should be 1 for li
434
+ getRISCVInstructionCost (RISCVInstruction::VMV, LT.second , 1 ,
435
+ CostKind) +
436
+ getRISCVInstructionCost (RISCVInstruction::VMERGE, LT.second , 1 ,
437
+ CostKind));
379
438
}
380
439
case TTI::SK_Broadcast: {
381
440
bool HasScalar = (Args.size () > 0 ) && (Operator::getOpcode (Args[0 ]) ==
@@ -387,7 +446,12 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
387
446
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
388
447
// vmv.v.x v8, a0
389
448
// vmsne.vi v0, v8, 0
390
- return LT.first * TLI->getLMULCost (LT.second ) * 3 ;
449
+ return LT.first *
450
+ (TLI->getLMULCost (LT.second ) + // FIXME: should be 1 for andi
451
+ getRISCVInstructionCost (RISCVInstruction::VMV, LT.second , 1 ,
452
+ CostKind) +
453
+ getRISCVInstructionCost (RISCVInstruction::VSIMPLE_INT,
454
+ LT.second , 1 , CostKind));
391
455
}
392
456
// Example sequence:
393
457
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -398,24 +462,34 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
398
462
// vmv.v.x v8, a0
399
463
// vmsne.vi v0, v8, 0
400
464
401
- return LT.first * TLI->getLMULCost (LT.second ) * 6 ;
465
+ return LT.first *
466
+ (TLI->getLMULCost (LT.second ) + // FIXME: this should be 1 for andi
467
+ getRISCVInstructionCost (RISCVInstruction::VMV, LT.second , 3 ,
468
+ CostKind) +
469
+ getRISCVInstructionCost (RISCVInstruction::VMERGE, LT.second , 1 ,
470
+ CostKind) +
471
+ getRISCVInstructionCost (RISCVInstruction::VSIMPLE_INT, LT.second ,
472
+ 1 , CostKind));
402
473
}
403
474
404
475
if (HasScalar) {
405
476
// Example sequence:
406
477
// vmv.v.x v8, a0
407
- return LT.first * TLI->getLMULCost (LT.second );
478
+ return LT.first * getRISCVInstructionCost (RISCVInstruction::VMV,
479
+ LT.second , 1 , CostKind);
408
480
}
409
481
410
482
// Example sequence:
411
483
// vrgather.vi v9, v8, 0
412
- return LT.first * TLI->getVRGatherVICost (LT.second );
484
+ return LT.first * getRISCVInstructionCost (RISCVInstruction::VRGATHER_VI,
485
+ LT.second , 1 , CostKind);
413
486
}
414
487
case TTI::SK_Splice:
415
488
// vslidedown+vslideup.
416
489
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
417
490
// of similar code, but I think we expand through memory.
418
- return 2 * LT.first * TLI->getVSlideCost (LT.second );
491
+ return LT.first * getRISCVInstructionCost (RISCVInstruction::VSLIDE,
492
+ LT.second , 2 , CostKind);
419
493
case TTI::SK_Reverse: {
420
494
// TODO: Cases to improve here:
421
495
// * Illegal vector types
@@ -435,7 +509,11 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
435
509
if (LT.second .isFixedLengthVector ())
436
510
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
437
511
LenCost = isInt<5 >(LT.second .getVectorNumElements () - 1 ) ? 0 : 1 ;
438
- InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost (LT.second );
512
+ // FIXME: replace the constant `2` below with cost of VSIMPLE_INT (vid.v &
513
+ // vrsub.vx)
514
+ InstructionCost GatherCost =
515
+ 2 + getRISCVInstructionCost (RISCVInstruction::VRGATHER_VV, LT.second , 1 ,
516
+ CostKind);
439
517
// Mask operation additionally required extend and truncate
440
518
InstructionCost ExtendCost = Tp->getElementType ()->isIntegerTy (1 ) ? 3 : 0 ;
441
519
return LT.first * (LenCost + GatherCost + ExtendCost);
0 commit comments