Skip to content

Commit 20864d2

Browse files
authored
[ValueTypes][RISCV] Add v1bf16 type (#111112)
When trying to add RISC-V fadd reduction cost model tests for bf16, I noticed a crash when the vector was of <1 x bfloat>. It turns out that this was being scalarized because unlike f16/f32/f64, there's no v1bf16 value type, and the existing cost model code assumed that the legalized type would always be a vector. This adds v1bf16 to bring bf16 in line with the other fp types. It also adds some more RISC-V bf16 reduction tests which previously crashed, including tests to ensure that SLP won't emit fadd/fmul reductions for bf16 or f16 w/ zvfhmin after #111000.
1 parent 89d2a9d commit 20864d2

File tree

6 files changed

+470
-352
lines changed

6 files changed

+470
-352
lines changed

llvm/include/llvm/CodeGen/ValueTypes.td

Lines changed: 147 additions & 146 deletions
Large diffs are not rendered by default.

llvm/test/Analysis/CostModel/RISCV/arith-fp.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ define void @frem() {
521521
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %BF16 = frem bfloat undef, undef
522522
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef
523523
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef
524-
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1BF16 = frem <1 x bfloat> undef, undef
524+
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1BF16 = frem <1 x bfloat> undef, undef
525525
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2BF16 = frem <2 x bfloat> undef, undef
526526
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4BF16 = frem <4 x bfloat> undef, undef
527527
; CHECK-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8BF16 = frem <8 x bfloat> undef, undef
@@ -761,7 +761,7 @@ define void @fcopysign() {
761761
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.copysign.bf16(bfloat undef, bfloat undef)
762762
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef)
763763
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef)
764-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef)
764+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V1BF16 = call <1 x bfloat> @llvm.copysign.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef)
765765
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef)
766766
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef)
767767
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef)
@@ -889,7 +889,7 @@ define void @fma() {
889889
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %BF16 = call bfloat @llvm.fma.bf16(bfloat undef, bfloat undef, bfloat undef)
890890
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
891891
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
892-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x bfloat> undef)
892+
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V1BF16 = call <1 x bfloat> @llvm.fma.v1bf16(<1 x bfloat> undef, <1 x bfloat> undef, <1 x bfloat> undef)
893893
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2BF16 = call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> undef, <2 x bfloat> undef, <2 x bfloat> undef)
894894
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4BF16 = call <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat> undef, <4 x bfloat> undef, <4 x bfloat> undef)
895895
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8BF16 = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> undef, <8 x bfloat> undef, <8 x bfloat> undef)

llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,41 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2-
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
3-
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
4-
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
2+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFH
3+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s --check-prefixes=FP-REDUCE,FP-REDUCE-ZVFHMIN
4+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zfbfmin,+zvfbfmin -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output | FileCheck %s --check-prefix=SIZE
5+
6+
define void @reduce_fadd_bfloat() {
7+
; FP-REDUCE-LABEL: 'reduce_fadd_bfloat'
8+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
9+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
10+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
11+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
12+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
13+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
14+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
15+
; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
16+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
17+
;
18+
; SIZE-LABEL: 'reduce_fadd_bfloat'
19+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
20+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
21+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
22+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
23+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
24+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
25+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
26+
; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
27+
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
28+
;
29+
%V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
30+
%V2 = call fast bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef)
31+
%V4 = call fast bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef)
32+
%V8 = call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef)
33+
%V16 = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef)
34+
%v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef)
35+
%V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef)
36+
%V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef)
37+
ret void
38+
}
539

640
define void @reduce_fadd_half() {
741
; FP-REDUCE-ZVFH-LABEL: 'reduce_fadd_half'
@@ -116,6 +150,40 @@ define void @reduce_fadd_double() {
116150
ret void
117151
}
118152

153+
define void @reduce_oredered_fadd_bfloat() {
154+
; FP-REDUCE-LABEL: 'reduce_oredered_fadd_bfloat'
155+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
156+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
157+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
158+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
159+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
160+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
161+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
162+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
163+
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
164+
;
165+
; SIZE-LABEL: 'reduce_oredered_fadd_bfloat'
166+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
167+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
168+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
169+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR0000, <8 x bfloat> undef)
170+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> undef)
171+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
172+
; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
173+
; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
174+
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
175+
;
176+
%V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
177+
%V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0.0, <2 x bfloat> undef)
178+
%V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0.0, <4 x bfloat> undef)
179+
%V8 = call bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0.0, <8 x bfloat> undef)
180+
%V16 = call bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0.0, <16 x bfloat> undef)
181+
%v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef)
182+
%V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef)
183+
%V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef)
184+
ret void
185+
}
186+
119187
define void @reduce_oredered_fadd_half() {
120188
; FP-REDUCE-LABEL: 'reduce_oredered_fadd_half'
121189
; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)

0 commit comments

Comments
 (0)