Skip to content

Commit 5c6453d

Browse files
committed
[AArch64] Add BF16 REV costs.
Same as FP16 costs, these full reverse shuffles can use REV or REV+EXT.
1 parent 19a39e9 commit 5c6453d

File tree

3 files changed

+9
-7
lines changed

3 files changed

+9
-7
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5023,9 +5023,11 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
50235023
{TTI::SK_Reverse, MVT::v4f32, 2}, // REV64; EXT
50245024
{TTI::SK_Reverse, MVT::v2f64, 1}, // EXT
50255025
{TTI::SK_Reverse, MVT::v8f16, 2}, // REV64; EXT
5026+
{TTI::SK_Reverse, MVT::v8bf16, 2}, // REV64; EXT
50265027
{TTI::SK_Reverse, MVT::v8i16, 2}, // REV64; EXT
50275028
{TTI::SK_Reverse, MVT::v16i8, 2}, // REV64; EXT
50285029
{TTI::SK_Reverse, MVT::v4f16, 1}, // REV64
5030+
{TTI::SK_Reverse, MVT::v4bf16, 1}, // REV64
50295031
{TTI::SK_Reverse, MVT::v4i16, 1}, // REV64
50305032
{TTI::SK_Reverse, MVT::v8i8, 1}, // REV64
50315033
// Splice can all be lowered as `ext`.
@@ -5039,8 +5041,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(
50395041
{TTI::SK_Splice, MVT::v8bf16, 1},
50405042
{TTI::SK_Splice, MVT::v8i16, 1},
50415043
{TTI::SK_Splice, MVT::v16i8, 1},
5042-
{TTI::SK_Splice, MVT::v4bf16, 1},
50435044
{TTI::SK_Splice, MVT::v4f16, 1},
5045+
{TTI::SK_Splice, MVT::v4bf16, 1},
50445046
{TTI::SK_Splice, MVT::v4i16, 1},
50455047
{TTI::SK_Splice, MVT::v8i8, 1},
50465048
// Broadcast shuffle kinds for scalable vectors

llvm/test/Analysis/CostModel/AArch64/shuffle-reverse.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ define void @reverse() {
2727
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = shufflevector <4 x half> undef, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2828
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16 = shufflevector <8 x half> undef, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2929
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
30-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> <i32 1, i32 0>
30+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = shufflevector <2 x bfloat> undef, <2 x bfloat> undef, <2 x i32> <i32 1, i32 0>
3131
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
32-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
33-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
32+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
33+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3434
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = shufflevector <2 x float> undef, <2 x float> undef, <2 x i32> <i32 1, i32 0>
3535
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3636
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -94,7 +94,7 @@ define void @vrev64() {
9494
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16 = shufflevector <16 x half> undef, <16 x half> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
9595
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = shufflevector <4 x bfloat> undef, <4 x bfloat> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
9696
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8bf16 = shufflevector <8 x bfloat> undef, <8 x bfloat> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
97-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
97+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16bf16 = shufflevector <16 x bfloat> undef, <16 x bfloat> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
9898
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32 = shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
9999
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = shufflevector <8 x float> undef, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
100100
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32 = shufflevector <16 x float> undef, <16 x float> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>

llvm/test/Analysis/CostModel/AArch64/vector-reverse.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ define void @vector_reverse() #0{
2121
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %12 = call <8 x float> @llvm.vector.reverse.v8f32(<8 x float> undef)
2222
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <2 x double> @llvm.vector.reverse.v2f64(<2 x double> undef)
2323
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %14 = call <4 x double> @llvm.vector.reverse.v4f64(<4 x double> undef)
24-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %15 = call <8 x bfloat> @llvm.vector.reverse.v8bf16(<8 x bfloat> undef)
25-
; CHECK-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %16 = call <16 x bfloat> @llvm.vector.reverse.v16bf16(<16 x bfloat> undef)
24+
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <8 x bfloat> @llvm.vector.reverse.v8bf16(<8 x bfloat> undef)
25+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %16 = call <16 x bfloat> @llvm.vector.reverse.v16bf16(<16 x bfloat> undef)
2626
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
2727
;
2828

0 commit comments

Comments
 (0)