Skip to content

Commit 909e278

Browse files
committed
add vertical float math: abs, sqrt, sqrte, rsqrte, fma
1 parent 147fee4 commit 909e278

File tree

7 files changed

+281
-6
lines changed

7 files changed

+281
-6
lines changed

coresimd/ppsv/api/float_math.rs

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
//! Float math
2+
3+
macro_rules! impl_float_math {
4+
($id:ident) => {
5+
impl $id {
6+
/// Absolute-value
7+
#[inline]
8+
pub fn abs(self) -> Self {
9+
use coresimd::ppsv::codegen::abs::FloatAbs;
10+
FloatAbs::abs(self)
11+
}
12+
13+
/// Square-root
14+
#[inline]
15+
pub fn sqrt(self) -> Self {
16+
use coresimd::ppsv::codegen::sqrt::FloatSqrt;
17+
FloatSqrt::sqrt(self)
18+
}
19+
20+
/// Square-root estimate
21+
#[inline]
22+
pub fn sqrte(self) -> Self {
23+
use coresimd::simd_llvm::simd_fsqrt;
24+
unsafe { simd_fsqrt(self) }
25+
}
26+
27+
/// Reciprocal square-root estimate
28+
#[inline]
29+
pub fn rsqrte(self) -> Self {
30+
unsafe {
31+
use coresimd::simd_llvm::simd_fsqrt;
32+
$id::splat(1.) / simd_fsqrt(self)
33+
}
34+
}
35+
36+
/// Fused multiply add: `self * y + z`
37+
#[inline]
38+
pub fn fma(self, y: Self, z: Self) -> Self {
39+
use coresimd::ppsv::codegen::fma::FloatFma;
40+
FloatFma::fma(self, y, z)
41+
}
42+
}
43+
};
44+
}
45+
46+
macro_rules! test_float_math {
47+
($id:ident, $elem_ty:ident) => {
48+
49+
fn sqrt2() -> $elem_ty {
50+
match ::mem::size_of::<$elem_ty>() {
51+
4 => 1.4142135 as $elem_ty,
52+
8 => 1.4142135623730951 as $elem_ty,
53+
_ => unreachable!(),
54+
}
55+
}
56+
57+
#[test]
58+
fn abs() {
59+
use coresimd::simd::*;
60+
let o = $id::splat(1 as $elem_ty);
61+
assert_eq!(o, o.abs());
62+
63+
let mo = $id::splat(-1 as $elem_ty);
64+
assert_eq!(o, mo.abs());
65+
}
66+
67+
#[test]
68+
fn sqrt() {
69+
use coresimd::simd::*;
70+
let z = $id::splat(0 as $elem_ty);
71+
let o = $id::splat(1 as $elem_ty);
72+
assert_eq!(z, z.sqrt());
73+
assert_eq!(o, o.sqrt());
74+
75+
let t = $id::splat(2 as $elem_ty);
76+
let e = $id::splat(sqrt2() as $elem_ty);
77+
assert_eq!(e, t.sqrt());
78+
}
79+
80+
#[test]
81+
fn sqrte() {
82+
use coresimd::simd::*;
83+
let z = $id::splat(0 as $elem_ty);
84+
let o = $id::splat(1 as $elem_ty);
85+
assert_eq!(z, z.sqrte());
86+
assert_eq!(o, o.sqrte());
87+
88+
let t = $id::splat(2 as $elem_ty);
89+
let e = $id::splat(sqrt2() as $elem_ty);
90+
let error = (e - t.sqrte()).abs();
91+
let tol = $id::splat(2.4e-4 as $elem_ty);
92+
93+
assert!(error.le(tol).all());
94+
}
95+
96+
#[test]
97+
fn rsqrte() {
98+
use coresimd::simd::*;
99+
let o = $id::splat(1 as $elem_ty);
100+
assert_eq!(o, o.rsqrte());
101+
102+
let t = $id::splat(2 as $elem_ty);
103+
let e = 1. / sqrt2();
104+
let error = (e - t.rsqrte()).abs();
105+
let tol = $id::splat(2.4e-4 as $elem_ty);
106+
assert!(error.le(tol).all());
107+
}
108+
109+
#[test]
110+
fn fma() {
111+
use coresimd::simd::*;
112+
let z = $id::splat(0 as $elem_ty);
113+
let o = $id::splat(1 as $elem_ty);
114+
let t = $id::splat(2 as $elem_ty);
115+
let t3 = $id::splat(3 as $elem_ty);
116+
let f = $id::splat(4 as $elem_ty);
117+
118+
assert_eq!(z, z.fma(z, z));
119+
assert_eq!(o, o.fma(o, z));
120+
assert_eq!(o, o.fma(z, o));
121+
assert_eq!(o, z.fma(o, o));
122+
123+
assert_eq!(t, o.fma(o, o));
124+
assert_eq!(t, o.fma(t, z));
125+
assert_eq!(t, t.fma(o, z));
126+
127+
assert_eq!(f, t.fma(t, z));
128+
assert_eq!(f, t.fma(o, t));
129+
assert_eq!(t3, t.fma(t, o));
130+
}
131+
};
132+
}

coresimd/ppsv/api/mod.rs

+11-4
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ mod default;
8484
#[macro_use]
8585
mod eq;
8686
#[macro_use]
87+
mod float_math;
88+
#[macro_use]
8789
mod fmt;
8890
#[macro_use]
8991
mod from;
@@ -128,7 +130,8 @@ pub trait Lanes<A> {}
128130

129131
/// Defines a portable packed SIMD floating-point vector type.
130132
macro_rules! simd_f_ty {
131-
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
133+
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
134+
$test_macro:ident |
132135
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
133136
vector_impl!(
134137
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
@@ -142,7 +145,8 @@ macro_rules! simd_f_ty {
142145
[impl_neg_op, $id, $elem_ty],
143146
[impl_partial_eq, $id],
144147
[impl_default, $id, $elem_ty],
145-
[impl_float_minmax_ops, $id]
148+
[impl_float_minmax_ops, $id],
149+
[impl_float_math, $id]
146150
);
147151

148152
$test_macro!(
@@ -160,14 +164,16 @@ macro_rules! simd_f_ty {
160164
test_default!($id, $elem_ty);
161165
test_mask_select!($mask_ty, $id, $elem_ty);
162166
test_float_minmax_ops!($id, $elem_ty);
167+
test_float_math!($id, $elem_ty);
163168
}
164169
);
165170
}
166171
}
167172

168173
/// Defines a portable packed SIMD signed-integer vector type.
169174
macro_rules! simd_i_ty {
170-
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
175+
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
176+
$test_macro:ident |
171177
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
172178
vector_impl!(
173179
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
@@ -221,7 +227,8 @@ macro_rules! simd_i_ty {
221227

222228
/// Defines a portable packed SIMD unsigned-integer vector type.
223229
macro_rules! simd_u_ty {
224-
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
230+
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
231+
$test_macro:ident |
225232
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
226233
vector_impl!(
227234
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],

coresimd/ppsv/codegen/abs.rs

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//! Vector absolute value
2+
3+
use coresimd::simd::*;
4+
5+
#[allow(improper_ctypes)]
6+
extern "C" {
7+
#[link_name = "llvm.fabs.v2f32"]
8+
fn abs_v2f32(x: f32x2) -> f32x2;
9+
#[link_name = "llvm.fabs.v4f32"]
10+
fn abs_v4f32(x: f32x4) -> f32x4;
11+
#[link_name = "llvm.fabs.v8f32"]
12+
fn abs_v8f32(x: f32x8) -> f32x8;
13+
#[link_name = "llvm.fabs.v16f32"]
14+
fn abs_v16f32(x: f32x16) -> f32x16;
15+
#[link_name = "llvm.fabs.v2f64"]
16+
fn abs_v2f64(x: f64x2) -> f64x2;
17+
#[link_name = "llvm.fabs.v4f64"]
18+
fn abs_v4f64(x: f64x4) -> f64x4;
19+
#[link_name = "llvm.fabs.v8f64"]
20+
fn abs_v8f64(x: f64x8) -> f64x8;
21+
}
22+
23+
pub(crate) trait FloatAbs {
24+
fn abs(self) -> Self;
25+
}
26+
27+
macro_rules! impl_fabs {
28+
($id:ident: $fn:ident) => {
29+
impl FloatAbs for $id {
30+
fn abs(self) -> Self {
31+
unsafe { $fn(self) }
32+
}
33+
}
34+
}
35+
}
36+
37+
impl_fabs!(f32x2: abs_v2f32);
38+
impl_fabs!(f32x4: abs_v4f32);
39+
impl_fabs!(f32x8: abs_v8f32);
40+
impl_fabs!(f32x16: abs_v16f32);
41+
impl_fabs!(f64x2: abs_v2f64);
42+
impl_fabs!(f64x4: abs_v4f64);
43+
impl_fabs!(f64x8: abs_v8f64);

coresimd/ppsv/codegen/fma.rs

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//! Vector fused multiply add
2+
3+
use coresimd::simd::*;
4+
5+
#[allow(improper_ctypes)]
6+
extern "C" {
7+
#[link_name = "llvm.fma.v2f32"]
8+
fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
9+
#[link_name = "llvm.fma.v4f32"]
10+
fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
11+
#[link_name = "llvm.fma.v8f32"]
12+
fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
13+
#[link_name = "llvm.fma.v16f32"]
14+
fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
15+
#[link_name = "llvm.fma.v2f64"]
16+
fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
17+
#[link_name = "llvm.fma.v4f64"]
18+
fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
19+
#[link_name = "llvm.fma.v8f64"]
20+
fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
21+
}
22+
23+
pub(crate) trait FloatFma {
24+
fn fma(self, y: Self, z: Self) -> Self;
25+
}
26+
27+
macro_rules! impl_fma {
28+
($id:ident: $fn:ident) => {
29+
impl FloatFma for $id {
30+
fn fma(self, y: Self, z: Self) -> Self {
31+
unsafe { $fn(self, y, z) }
32+
}
33+
}
34+
}
35+
}
36+
37+
impl_fma!(f32x2: fma_v2f32);
38+
impl_fma!(f32x4: fma_v4f32);
39+
impl_fma!(f32x8: fma_v8f32);
40+
impl_fma!(f32x16: fma_v16f32);
41+
impl_fma!(f64x2: fma_v2f64);
42+
impl_fma!(f64x4: fma_v4f64);
43+
impl_fma!(f64x8: fma_v8f64);

coresimd/ppsv/codegen/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,7 @@
44
pub mod wrapping;
55

66
pub mod masks_reductions;
7+
8+
pub mod sqrt;
9+
pub mod abs;
10+
pub mod fma;

coresimd/ppsv/codegen/sqrt.rs

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//! Exact vector square-root
2+
3+
use coresimd::simd::*;
4+
5+
#[allow(improper_ctypes)]
6+
extern "C" {
7+
#[link_name = "llvm.sqrt.v2f32"]
8+
fn sqrt_v2f32(x: f32x2) -> f32x2;
9+
#[link_name = "llvm.sqrt.v4f32"]
10+
fn sqrt_v4f32(x: f32x4) -> f32x4;
11+
#[link_name = "llvm.sqrt.v8f32"]
12+
fn sqrt_v8f32(x: f32x8) -> f32x8;
13+
#[link_name = "llvm.sqrt.v16f32"]
14+
fn sqrt_v16f32(x: f32x16) -> f32x16;
15+
#[link_name = "llvm.sqrt.v2f64"]
16+
fn sqrt_v2f64(x: f64x2) -> f64x2;
17+
#[link_name = "llvm.sqrt.v4f64"]
18+
fn sqrt_v4f64(x: f64x4) -> f64x4;
19+
#[link_name = "llvm.sqrt.v8f64"]
20+
fn sqrt_v8f64(x: f64x8) -> f64x8;
21+
}
22+
23+
pub(crate) trait FloatSqrt {
24+
fn sqrt(self) -> Self;
25+
}
26+
27+
macro_rules! impl_fsqrt {
28+
($id:ident: $fn:ident) => {
29+
impl FloatSqrt for $id {
30+
fn sqrt(self) -> Self {
31+
unsafe { $fn(self) }
32+
}
33+
}
34+
}
35+
}
36+
37+
impl_fsqrt!(f32x2: sqrt_v2f32);
38+
impl_fsqrt!(f32x4: sqrt_v4f32);
39+
impl_fsqrt!(f32x8: sqrt_v8f32);
40+
impl_fsqrt!(f32x16: sqrt_v16f32);
41+
impl_fsqrt!(f64x2: sqrt_v2f64);
42+
impl_fsqrt!(f64x4: sqrt_v4f64);
43+
impl_fsqrt!(f64x8: sqrt_v8f64);

coresimd/simd_llvm.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ extern "platform-intrinsic" {
4949
pub fn simd_select<M, T>(m: M, a: T, b: T) -> T;
5050

5151
pub fn simd_fmin<T>(a: T, b: T) -> T;
52-
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
53-
// pub fn simd_fmax<T>(a: T, b: T) -> T;
52+
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
53+
// pub fn simd_fmax<T>(a: T, b: T) -> T;
54+
55+
pub fn simd_fsqrt<T>(a: T) -> T;
56+
pub fn simd_fma<T>(a: T, b: T, c: T) -> T;
5457
}

0 commit comments

Comments
 (0)