Skip to content

Commit 2c0add9

Browse files
authored
[TTI] Return a more sensible cost for histogram intrinsic. (#97397)
This is just an initial cost, making it invalid for any target which doesn't specifically return a cost for now. Also adds an AArch64 specific cost check. We will need to improve that later, e.g. by returning a scalarization cost for generic targets and possibly introducing a new TTI method, at least once LoopVectorize has changed it's cost model. The reason is that the histogram intrinsic also effectively contains a gather and scatter, and we will need details of the addressing to determine an appropriate cost for that.
1 parent 81660bb commit 2c0add9

File tree

3 files changed

+154
-0
lines changed

3 files changed

+154
-0
lines changed

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,9 @@ class TargetTransformInfoImplBase {
728728
switch (ICA.getID()) {
729729
default:
730730
break;
731+
case Intrinsic::experimental_vector_histogram_add:
732+
// For now, we want explicit support from the target for histograms.
733+
return InstructionCost::getInvalid();
731734
case Intrinsic::allow_runtime_check:
732735
case Intrinsic::allow_ubsan_check:
733736
case Intrinsic::annotation:

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ static cl::opt<bool> EnableOrLikeSelectOpt("enable-aarch64-or-like-select",
6161
static cl::opt<bool> EnableLSRCostOpt("enable-aarch64-lsr-cost-opt",
6262
cl::init(true), cl::Hidden);
6363

64+
// A complete guess as to a reasonable cost.
65+
static cl::opt<unsigned>
66+
BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden,
67+
cl::desc("The cost of a histcnt instruction"));
68+
6469
namespace {
6570
class TailFoldingOption {
6671
// These bitfields will only ever be set to something non-zero in operator=,
@@ -508,11 +513,39 @@ static bool isUnpackedVectorVT(EVT VecVT) {
508513
VecVT.getSizeInBits().getKnownMinValue() < AArch64::SVEBitsPerBlock;
509514
}
510515

516+
static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) {
517+
Type *BucketPtrsTy = ICA.getArgTypes()[0]; // Type of vector of pointers
518+
Type *EltTy = ICA.getArgTypes()[1]; // Type of bucket elements
519+
520+
// Only allow (32b and 64b) integers or pointers for now...
521+
if ((!EltTy->isIntegerTy() && !EltTy->isPointerTy()) ||
522+
(EltTy->getScalarSizeInBits() != 32 &&
523+
EltTy->getScalarSizeInBits() != 64))
524+
return InstructionCost::getInvalid();
525+
526+
// FIXME: Hacky check for legal vector types. We can promote smaller types
527+
// but we cannot legalize vectors via splitting for histcnt.
528+
// FIXME: We should be able to generate histcnt for fixed-length vectors
529+
// using ptrue with a specific VL.
530+
if (VectorType *VTy = dyn_cast<VectorType>(BucketPtrsTy))
531+
if ((VTy->getElementCount().getKnownMinValue() != 2 &&
532+
VTy->getElementCount().getKnownMinValue() != 4) ||
533+
VTy->getPrimitiveSizeInBits().getKnownMinValue() > 128 ||
534+
!VTy->isScalableTy())
535+
return InstructionCost::getInvalid();
536+
537+
return InstructionCost(BaseHistCntCost);
538+
}
539+
511540
InstructionCost
512541
AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
513542
TTI::TargetCostKind CostKind) {
514543
auto *RetTy = ICA.getReturnType();
515544
switch (ICA.getID()) {
545+
case Intrinsic::experimental_vector_histogram_add:
546+
if (!ST->hasSVE2())
547+
return InstructionCost::getInvalid();
548+
return getHistogramCost(ICA);
516549
case Intrinsic::umin:
517550
case Intrinsic::umax:
518551
case Intrinsic::smin:

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,123 @@ define void @masked_scatter_v1i128(<1 x i128> %data, <1 x ptr> %ptrs, <1 x i1> %
909909
ret void
910910
}
911911

912+
define void @histogram_nxv2i64(<vscale x 2 x ptr> %buckets, <vscale x 2 x i1> %mask) #3 {
913+
; CHECK-LABEL: 'histogram_nxv2i64'
914+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %buckets, i64 1, <vscale x 2 x i1> %mask)
915+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
916+
;
917+
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv2i64'
918+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %buckets, i64 1, <vscale x 2 x i1> %mask)
919+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
920+
;
921+
call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> %buckets, i64 1, <vscale x 2 x i1> %mask)
922+
ret void
923+
}
924+
925+
define void @histogram_nxv4i32(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) #3 {
926+
; CHECK-LABEL: 'histogram_nxv4i32'
927+
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> %buckets, i32 1, <vscale x 4 x i1> %mask)
928+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
929+
;
930+
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i32'
931+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> %buckets, i32 1, <vscale x 4 x i1> %mask)
932+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
933+
;
934+
call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> %buckets, i32 1, <vscale x 4 x i1> %mask)
935+
ret void
936+
}
937+
938+
define void @histogram_nxv8i16(<vscale x 8 x ptr> %buckets, <vscale x 8 x i1> %mask) {
939+
; CHECK-LABEL: 'histogram_nxv8i16'
940+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
941+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
942+
;
943+
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv8i16'
944+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
945+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
946+
;
947+
call void @llvm.experimental.vector.histogram.add.nxv8p0.i16(<vscale x 8 x ptr> %buckets, i16 1, <vscale x 8 x i1> %mask)
948+
ret void
949+
}
950+
951+
define void @histogram_nxv16i8(<vscale x 16 x ptr> %buckets, <vscale x 16 x i1> %mask) {
952+
; CHECK-LABEL: 'histogram_nxv16i8'
953+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
954+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
955+
;
956+
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv16i8'
957+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv16p0.i8(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
958+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
959+
;
960+
call void @llvm.experimental.vector.histogram.add.nxv16p0.i64(<vscale x 16 x ptr> %buckets, i8 1, <vscale x 16 x i1> %mask)
961+
ret void
962+
}
963+
964+
define void @histogram_v2i64(<2 x ptr> %buckets, <2 x i1> %mask) {
965+
; CHECK-LABEL: 'histogram_v2i64'
966+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
967+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
968+
;
969+
; TYPE_BASED_ONLY-LABEL: 'histogram_v2i64'
970+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
971+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
972+
;
973+
call void @llvm.experimental.vector.histogram.add.v2p0.i64(<2 x ptr> %buckets, i64 1, <2 x i1> %mask)
974+
ret void
975+
}
976+
977+
define void @histogram_v4i32(<4 x ptr> %buckets, <4 x i1> %mask) {
978+
; CHECK-LABEL: 'histogram_v4i32'
979+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
980+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
981+
;
982+
; TYPE_BASED_ONLY-LABEL: 'histogram_v4i32'
983+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
984+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
985+
;
986+
call void @llvm.experimental.vector.histogram.add.v4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
987+
ret void
988+
}
989+
990+
define void @histogram_v8i16(<8 x ptr> %buckets, <8 x i1> %mask) {
991+
; CHECK-LABEL: 'histogram_v8i16'
992+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
993+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
994+
;
995+
; TYPE_BASED_ONLY-LABEL: 'histogram_v8i16'
996+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
997+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
998+
;
999+
call void @llvm.experimental.vector.histogram.add.v8p0.i16(<8 x ptr> %buckets, i16 1, <8 x i1> %mask)
1000+
ret void
1001+
}
1002+
1003+
define void @histogram_v16i8(<16 x ptr> %buckets, <16 x i1> %mask) {
1004+
; CHECK-LABEL: 'histogram_v16i8'
1005+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
1006+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1007+
;
1008+
; TYPE_BASED_ONLY-LABEL: 'histogram_v16i8'
1009+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.v16p0.i8(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
1010+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1011+
;
1012+
call void @llvm.experimental.vector.histogram.add.v16p0.i64(<16 x ptr> %buckets, i8 1, <16 x i1> %mask)
1013+
ret void
1014+
}
1015+
1016+
define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %mask) {
1017+
; CHECK-LABEL: 'histogram_nxv4i64'
1018+
; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
1019+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1020+
;
1021+
; TYPE_BASED_ONLY-LABEL: 'histogram_nxv4i64'
1022+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
1023+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1024+
;
1025+
call void @llvm.experimental.vector.histogram.add.nxv4p0.i64(<vscale x 4 x ptr> %buckets, i64 1, <vscale x 4 x i1> %mask)
1026+
ret void
1027+
}
1028+
9121029
declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64)
9131030
declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64)
9141031
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)
@@ -949,3 +1066,4 @@ declare void @llvm.masked.scatter.v1i128.v1p0(<1 x i128> %data, <1 x ptr> %ptrs,
9491066
attributes #0 = { "target-features"="+sve,+bf16" }
9501067
attributes #1 = { "target-features"="+sve" vscale_range(1,16) }
9511068
attributes #2 = { "target-features"="+sve" vscale_range(2, 16) }
1069+
attributes #3 = { "target-features"="+sve,+sve2" vscale_range(1,16) }

0 commit comments

Comments
 (0)