@@ -34,6 +34,65 @@ static cl::opt<unsigned> SLPMaxVF(
34
34
" exclusively by SLP vectorizer." ),
35
35
cl::Hidden);
36
36
37
+ InstructionCost
38
+ RISCVTTIImpl::getRISCVInstructionCost (ArrayRef<unsigned > OpCodes, MVT VT,
39
+ TTI::TargetCostKind CostKind) {
40
+ size_t NumInstr = OpCodes.size ();
41
+ if (CostKind == TTI::TCK_CodeSize)
42
+ return NumInstr;
43
+ InstructionCost LMULCost = TLI->getLMULCost (VT);
44
+ if ((CostKind != TTI::TCK_RecipThroughput) && (CostKind != TTI::TCK_Latency))
45
+ return LMULCost * NumInstr;
46
+ InstructionCost Cost = 0 ;
47
+ for (auto Op : OpCodes) {
48
+ switch (Op) {
49
+ case RISCV::VRGATHER_VI:
50
+ Cost += TLI->getVRGatherVICost (VT);
51
+ break ;
52
+ case RISCV::VRGATHER_VV:
53
+ Cost += TLI->getVRGatherVVCost (VT);
54
+ break ;
55
+ case RISCV::VSLIDEUP_VI:
56
+ case RISCV::VSLIDEDOWN_VI:
57
+ Cost += TLI->getVSlideVICost (VT);
58
+ break ;
59
+ case RISCV::VSLIDEUP_VX:
60
+ case RISCV::VSLIDEDOWN_VX:
61
+ Cost += TLI->getVSlideVXCost (VT);
62
+ break ;
63
+ case RISCV::VREDMAX_VS:
64
+ case RISCV::VREDMIN_VS:
65
+ case RISCV::VREDMAXU_VS:
66
+ case RISCV::VREDMINU_VS:
67
+ case RISCV::VREDSUM_VS:
68
+ case RISCV::VREDAND_VS:
69
+ case RISCV::VREDOR_VS:
70
+ case RISCV::VREDXOR_VS:
71
+ case RISCV::VFREDMAX_VS:
72
+ case RISCV::VFREDMIN_VS:
73
+ case RISCV::VFREDUSUM_VS: {
74
+ unsigned VL = VT.getVectorMinNumElements ();
75
+ if (!VT.isFixedLengthVector ())
76
+ VL *= *getVScaleForTuning ();
77
+ Cost += Log2_32_Ceil (VL);
78
+ break ;
79
+ }
80
+ case RISCV::VFREDOSUM_VS: {
81
+ unsigned VL = VT.getVectorMinNumElements ();
82
+ if (!VT.isFixedLengthVector ())
83
+ VL *= *getVScaleForTuning ();
84
+ Cost += VL;
85
+ break ;
86
+ }
87
+ case RISCV::VMV_S_X:
88
+ // FIXME: VMV_S_X doesn't use LMUL, the cost should be 1
89
+ default :
90
+ Cost += LMULCost;
91
+ }
92
+ }
93
+ return Cost;
94
+ }
95
+
37
96
InstructionCost RISCVTTIImpl::getIntImmCost (const APInt &Imm, Type *Ty,
38
97
TTI::TargetCostKind CostKind) {
39
98
assert (Ty->isIntegerTy () &&
@@ -281,7 +340,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
281
340
// Example sequence:
282
341
// vnsrl.wi v10, v8, 0
283
342
if (equal (DeinterleaveMask, Mask))
284
- return LT.first * TLI->getLMULCost (LT.second );
343
+ return LT.first * getRISCVInstructionCost (RISCV::VNSRL_WI,
344
+ LT.second , CostKind);
285
345
}
286
346
}
287
347
}
@@ -292,7 +352,8 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
292
352
LT.second .getVectorNumElements () <= 256 )) {
293
353
VectorType *IdxTy = getVRGatherIndexType (LT.second , *ST, Tp->getContext ());
294
354
InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
295
- return IndexCost + TLI->getVRGatherVVCost (LT.second );
355
+ return IndexCost +
356
+ getRISCVInstructionCost (RISCV::VRGATHER_VV, LT.second , CostKind);
296
357
}
297
358
[[fallthrough]];
298
359
}
@@ -310,7 +371,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
310
371
VectorType *MaskTy = VectorType::get (IntegerType::getInt1Ty (C), EC);
311
372
InstructionCost IndexCost = getConstantPoolLoadCost (IdxTy, CostKind);
312
373
InstructionCost MaskCost = getConstantPoolLoadCost (MaskTy, CostKind);
313
- return 2 * IndexCost + 2 * TLI->getVRGatherVVCost (LT.second ) + MaskCost;
374
+ return 2 * IndexCost +
375
+ getRISCVInstructionCost ({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
376
+ LT.second , CostKind) +
377
+ MaskCost;
314
378
}
315
379
[[fallthrough]];
316
380
}
@@ -365,19 +429,24 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
365
429
// Example sequence:
366
430
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
367
431
// vslidedown.vi v8, v9, 2
368
- return LT.first * TLI->getVSlideCost (LT.second );
432
+ return LT.first *
433
+ getRISCVInstructionCost (RISCV::VSLIDEDOWN_VI, LT.second , CostKind);
369
434
case TTI::SK_InsertSubvector:
370
435
// Example sequence:
371
436
// vsetivli zero, 4, e8, mf2, tu, ma (ignored)
372
437
// vslideup.vi v8, v9, 2
373
- return LT.first * TLI->getVSlideCost (LT.second );
438
+ return LT.first *
439
+ getRISCVInstructionCost (RISCV::VSLIDEUP_VI, LT.second , CostKind);
374
440
case TTI::SK_Select: {
375
441
// Example sequence:
376
442
// li a0, 90
377
443
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
378
444
// vmv.s.x v0, a0
379
445
// vmerge.vvm v8, v9, v8, v0
380
- return LT.first * 3 * TLI->getLMULCost (LT.second );
446
+ return LT.first *
447
+ (TLI->getLMULCost (LT.second ) + // FIXME: should be 1 for li
448
+ getRISCVInstructionCost ({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
449
+ LT.second , CostKind));
381
450
}
382
451
case TTI::SK_Broadcast: {
383
452
bool HasScalar = (Args.size () > 0 ) && (Operator::getOpcode (Args[0 ]) ==
@@ -389,7 +458,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
389
458
// vsetivli zero, 2, e8, mf8, ta, ma (ignored)
390
459
// vmv.v.x v8, a0
391
460
// vmsne.vi v0, v8, 0
392
- return LT.first * TLI->getLMULCost (LT.second ) * 3 ;
461
+ return LT.first *
462
+ (TLI->getLMULCost (LT.second ) + // FIXME: should be 1 for andi
463
+ getRISCVInstructionCost ({RISCV::VMV_V_X, RISCV::VMSNE_VI},
464
+ LT.second , CostKind));
393
465
}
394
466
// Example sequence:
395
467
// vsetivli zero, 2, e8, mf8, ta, mu (ignored)
@@ -400,24 +472,40 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
400
472
// vmv.v.x v8, a0
401
473
// vmsne.vi v0, v8, 0
402
474
403
- return LT.first * TLI->getLMULCost (LT.second ) * 6 ;
475
+ return LT.first *
476
+ (TLI->getLMULCost (LT.second ) + // FIXME: this should be 1 for andi
477
+ TLI->getLMULCost (
478
+ LT.second ) + // FIXME: vmv.x.s is the same as extractelement
479
+ getRISCVInstructionCost ({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
480
+ RISCV::VMV_V_X, RISCV::VMSNE_VI},
481
+ LT.second , CostKind));
404
482
}
405
483
406
484
if (HasScalar) {
407
485
// Example sequence:
408
486
// vmv.v.x v8, a0
409
- return LT.first * TLI->getLMULCost (LT.second );
487
+ return LT.first *
488
+ getRISCVInstructionCost (RISCV::VMV_V_X, LT.second , CostKind);
410
489
}
411
490
412
491
// Example sequence:
413
492
// vrgather.vi v9, v8, 0
414
- return LT.first * TLI->getVRGatherVICost (LT.second );
493
+ return LT.first *
494
+ getRISCVInstructionCost (RISCV::VRGATHER_VI, LT.second , CostKind);
415
495
}
416
- case TTI::SK_Splice:
496
+ case TTI::SK_Splice: {
417
497
// vslidedown+vslideup.
418
498
// TODO: Multiplying by LT.first implies this legalizes into multiple copies
419
499
// of similar code, but I think we expand through memory.
420
- return 2 * LT.first * TLI->getVSlideCost (LT.second );
500
+ ArrayRef<unsigned > Opcodes;
501
+ if (Index >= 0 && Index < 32 )
502
+ Opcodes = {RISCV::VSLIDEDOWN_VI, RISCV::VSLIDEUP_VX};
503
+ else if (Index < 0 && Index > -32 )
504
+ Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VI};
505
+ else
506
+ Opcodes = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
507
+ return LT.first * getRISCVInstructionCost (Opcodes, LT.second , CostKind);
508
+ }
421
509
case TTI::SK_Reverse: {
422
510
// TODO: Cases to improve here:
423
511
// * Illegal vector types
@@ -437,7 +525,9 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
437
525
if (LT.second .isFixedLengthVector ())
438
526
// vrsub.vi has a 5 bit immediate field, otherwise an li suffices
439
527
LenCost = isInt<5 >(LT.second .getVectorNumElements () - 1 ) ? 0 : 1 ;
440
- InstructionCost GatherCost = 2 + TLI->getVRGatherVVCost (LT.second );
528
+ // FIXME: replace the constant `2` below with cost of {VID_V,VRSUB_VX}
529
+ InstructionCost GatherCost =
530
+ 2 + getRISCVInstructionCost (RISCV::VRGATHER_VV, LT.second , CostKind);
441
531
// Mask operation additionally required extend and truncate
442
532
InstructionCost ExtendCost = Tp->getElementType ()->isIntegerTy (1 ) ? 3 : 0 ;
443
533
return LT.first * (LenCost + GatherCost + ExtendCost);
0 commit comments