Skip to content

Commit 175b45d

Browse files
committed
Add benchmarks for floating point math
This adds comparisons among the compiler-builtins function, system functions if available, and optionally handwritten assembly. These also help us identify inconsistencies between this crate and system functions, which may otherwise go unnoticed if intrinsics get lowered to inline operations rather than library calls.
1 parent 841bdaf commit 175b45d

14 files changed

+1730
-2
lines changed

ci/run.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ set -eux
44

55
target="${1:-}"
66

7-
if [ -z "${1:-}" ]; then
7+
export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
8+
9+
if [ -z "$target" ]; then
810
host_target=$(rustc -vV | awk '/^host/ { print $2 }')
911
echo "Defaulted to host target $host_target"
1012
target="$host_target"
@@ -24,6 +26,7 @@ else
2426
run="cargo test --manifest-path testcrate/Cargo.toml --no-fail-fast --target $target"
2527
$run
2628
$run --release
29+
$run --benches
2730
$run --features c
2831
$run --features c --release
2932
$run --features no-asm

testcrate/Cargo.toml

+44-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ path = ".."
2121
default-features = false
2222
features = ["public-test-deps"]
2323

24+
[dev-dependencies]
25+
criterion = { version = "0.5.1", default-features = false }
26+
paste = "1.0.15"
27+
2428
[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
2529
test = { git = "https://github.com/japaric/utest" }
2630
utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" }
@@ -34,6 +38,45 @@ no-f16-f128 = ["compiler_builtins/no-f16-f128"]
3438
mem = ["compiler_builtins/mem"]
3539
mangled-names = ["compiler_builtins/mangled-names"]
3640
# Skip tests that rely on f128 symbols being available on the system
37-
no-sys-f128 = ["no-sys-f128-int-convert"]
41+
no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
3842
# Some platforms have some f128 functions but everything except integer conversions
3943
no-sys-f128-int-convert = []
44+
no-sys-f16-f128-convert = []
45+
# Skip tests that rely on f16 symbols being available on the system
46+
no-sys-f16 = []
47+
48+
[[bench]]
49+
name = "float_add"
50+
harness = false
51+
52+
[[bench]]
53+
name = "float_sub"
54+
harness = false
55+
56+
[[bench]]
57+
name = "float_mul"
58+
harness = false
59+
60+
[[bench]]
61+
name = "float_div"
62+
harness = false
63+
64+
[[bench]]
65+
name = "float_cmp"
66+
harness = false
67+
68+
[[bench]]
69+
name = "float_conv"
70+
harness = false
71+
72+
[[bench]]
73+
name = "float_extend"
74+
harness = false
75+
76+
[[bench]]
77+
name = "float_trunc"
78+
harness = false
79+
80+
[[bench]]
81+
name = "float_pow"
82+
harness = false

testcrate/benches/float_add.rs

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#![feature(f128)]
2+
3+
use compiler_builtins::float::add;
4+
use criterion::{criterion_group, criterion_main, Criterion};
5+
use testcrate::float_bench;
6+
7+
float_bench! {
8+
name: add_f32,
9+
sig: (a: f32, b: f32) -> f32,
10+
crate_fn: add::__addsf3,
11+
sys_fn: __addsf3,
12+
sys_available: all(),
13+
asm: [
14+
#[cfg(target_arch = "x86_64")] {
15+
asm!(
16+
"addss {a}, {b}",
17+
a = inout(xmm_reg) a,
18+
b = in(xmm_reg) b,
19+
options(nomem, nostack)
20+
);
21+
22+
a
23+
};
24+
25+
#[cfg(target_arch = "aarch64")] {
26+
asm!(
27+
"fadd {a:s}, {a:s}, {b:s}",
28+
a = inout(vreg) a,
29+
b = in(vreg) b,
30+
options(nomem, nostack)
31+
);
32+
33+
a
34+
};
35+
],
36+
}
37+
38+
float_bench! {
39+
name: add_f64,
40+
sig: (a: f64, b: f64) -> f64,
41+
crate_fn: add::__adddf3,
42+
sys_fn: __adddf3,
43+
sys_available: all(),
44+
asm: [
45+
#[cfg(target_arch = "x86_64")] {
46+
asm!(
47+
"addsd {a}, {b}",
48+
a = inout(xmm_reg) a,
49+
b = in(xmm_reg) b,
50+
options(nomem, nostack)
51+
);
52+
53+
a
54+
};
55+
56+
#[cfg(target_arch = "aarch64")] {
57+
asm!(
58+
"fadd {a:d}, {a:d}, {b:d}",
59+
a = inout(vreg) a,
60+
b = in(vreg) b,
61+
options(nomem, nostack)
62+
);
63+
64+
a
65+
};
66+
],
67+
}
68+
69+
float_bench! {
70+
name: add_f128,
71+
sig: (a: f128, b: f128) -> f128,
72+
crate_fn: add::__addtf3,
73+
crate_fn_ppc: add::__addkf3,
74+
sys_fn: __addtf3,
75+
sys_fn_ppc: __addkf3,
76+
sys_available: not(feature = "no-sys-f128"),
77+
asm: []
78+
}
79+
80+
criterion_group!(float_add, add_f32, add_f64, add_f128);
81+
criterion_main!(float_add);

testcrate/benches/float_cmp.rs

+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#![feature(f128)]
2+
3+
use criterion::{criterion_group, criterion_main, Criterion};
4+
use testcrate::float_bench;
5+
6+
use compiler_builtins::float::cmp;
7+
8+
/// `gt` symbols are allowed to return differing results, they just get compared
9+
/// to 0.
10+
fn gt_res_eq(a: i32, b: i32) -> bool {
11+
let a_lt_0 = a <= 0;
12+
let b_lt_0 = b <= 0;
13+
(a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0)
14+
}
15+
16+
float_bench! {
17+
name: cmp_f32_gt,
18+
sig: (a: f32, b: f32) -> i32,
19+
crate_fn: cmp::__gtsf2,
20+
sys_fn: __gtsf2,
21+
sys_available: all(),
22+
output_eq: gt_res_eq,
23+
asm: [
24+
#[cfg(target_arch = "x86_64")] {
25+
let ret: i32;
26+
asm!(
27+
"xor {ret:e}, {ret:e}",
28+
"ucomiss {a}, {b}",
29+
"seta {ret:l}",
30+
a = in(xmm_reg) a,
31+
b = in(xmm_reg) b,
32+
ret = out(reg) ret,
33+
options(nomem, nostack)
34+
);
35+
36+
ret
37+
};
38+
39+
#[cfg(target_arch = "aarch64")] {
40+
let ret: i32;
41+
asm!(
42+
"fcmp {a:s}, {b:s}",
43+
"cset {ret:w}, gt",
44+
a = in(vreg) a,
45+
b = in(vreg) b,
46+
ret = out(reg) ret,
47+
options(nomem,nostack),
48+
);
49+
50+
ret
51+
};
52+
],
53+
}
54+
55+
float_bench! {
56+
name: cmp_f32_unord,
57+
sig: (a: f32, b: f32) -> i32,
58+
crate_fn: cmp::__unordsf2,
59+
sys_fn: __unordsf2,
60+
sys_available: all(),
61+
asm: [
62+
#[cfg(target_arch = "x86_64")] {
63+
let ret: i32;
64+
asm!(
65+
"xor {ret:e}, {ret:e}",
66+
"ucomiss {a}, {b}",
67+
"setp {ret:l}",
68+
a = in(xmm_reg) a,
69+
b = in(xmm_reg) b,
70+
ret = out(reg) ret,
71+
options(nomem, nostack)
72+
);
73+
74+
ret
75+
};
76+
77+
#[cfg(target_arch = "aarch64")] {
78+
let ret: i32;
79+
asm!(
80+
"fcmp {a:s}, {b:s}",
81+
"cset {ret:w}, vs",
82+
a = in(vreg) a,
83+
b = in(vreg) b,
84+
ret = out(reg) ret,
85+
options(nomem, nostack)
86+
);
87+
88+
ret
89+
};
90+
],
91+
}
92+
93+
float_bench! {
94+
name: cmp_f64_gt,
95+
sig: (a: f64, b: f64) -> i32,
96+
crate_fn: cmp::__gtdf2,
97+
sys_fn: __gtdf2,
98+
sys_available: all(),
99+
output_eq: gt_res_eq,
100+
asm: [
101+
#[cfg(target_arch = "x86_64")] {
102+
let ret: i32;
103+
asm!(
104+
"xor {ret:e}, {ret:e}",
105+
"ucomisd {a}, {b}",
106+
"seta {ret:l}",
107+
a = in(xmm_reg) a,
108+
b = in(xmm_reg) b,
109+
ret = out(reg) ret,
110+
options(nomem, nostack)
111+
);
112+
113+
ret
114+
};
115+
116+
#[cfg(target_arch = "aarch64")] {
117+
let ret: i32;
118+
asm!(
119+
"fcmp {a:d}, {b:d}",
120+
"cset {ret:w}, gt",
121+
a = in(vreg) a,
122+
b = in(vreg) b,
123+
ret = out(reg) ret,
124+
options(nomem, nostack)
125+
);
126+
127+
ret
128+
};
129+
],
130+
}
131+
132+
float_bench! {
133+
name: cmp_f64_unord,
134+
sig: (a: f64, b: f64) -> i32,
135+
crate_fn: cmp::__unorddf2,
136+
sys_fn: __unorddf2,
137+
sys_available: all(),
138+
asm: [
139+
#[cfg(target_arch = "x86_64")] {
140+
let ret: i32;
141+
asm!(
142+
"xor {ret:e}, {ret:e}",
143+
"ucomisd {a}, {b}",
144+
"setp {ret:l}",
145+
a = in(xmm_reg) a,
146+
b = in(xmm_reg) b,
147+
ret = out(reg) ret,
148+
options(nomem, nostack)
149+
);
150+
151+
ret
152+
};
153+
154+
#[cfg(target_arch = "aarch64")] {
155+
let ret: i32;
156+
asm!(
157+
"fcmp {a:d}, {b:d}",
158+
"cset {ret:w}, vs",
159+
a = in(vreg) a,
160+
b = in(vreg) b,
161+
ret = out(reg) ret,
162+
options(nomem, nostack)
163+
);
164+
165+
ret
166+
};
167+
],
168+
}
169+
170+
float_bench! {
171+
name: cmp_f128_gt,
172+
sig: (a: f128, b: f128) -> i32,
173+
crate_fn: cmp::__gttf2,
174+
crate_fn_ppc: cmp::__gtkf2,
175+
sys_fn: __gttf2,
176+
sys_fn_ppc: __gtkf2,
177+
sys_available: not(feature = "no-sys-f128"),
178+
output_eq: gt_res_eq,
179+
asm: []
180+
}
181+
182+
float_bench! {
183+
name: cmp_f128_unord,
184+
sig: (a: f128, b: f128) -> i32,
185+
crate_fn: cmp::__unordtf2,
186+
crate_fn_ppc: cmp::__unordkf2,
187+
sys_fn: __unordtf2,
188+
sys_fn_ppc: __unordkf2,
189+
sys_available: not(feature = "no-sys-f128"),
190+
asm: []
191+
}
192+
193+
criterion_group!(
194+
float_cmp,
195+
cmp_f32_gt,
196+
cmp_f32_unord,
197+
cmp_f64_gt,
198+
cmp_f64_unord,
199+
cmp_f128_gt,
200+
cmp_f128_unord
201+
);
202+
criterion_main!(float_cmp);

0 commit comments

Comments
 (0)