Skip to content

Portable vertical floating-point mathematical functions #467

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions coresimd/ppsv/api/float_math.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//! Float math

macro_rules! impl_float_math {
($id:ident) => {
impl $id {
/// Absolute-value
#[inline]
pub fn abs(self) -> Self {
use coresimd::ppsv::codegen::abs::FloatAbs;
FloatAbs::abs(self)
}

/// Square-root
#[inline]
pub fn sqrt(self) -> Self {
use coresimd::ppsv::codegen::sqrt::FloatSqrt;
FloatSqrt::sqrt(self)
}

/// Square-root estimate
#[inline]
pub fn sqrte(self) -> Self {
use coresimd::simd_llvm::simd_fsqrt;
unsafe { simd_fsqrt(self) }
}

/// Reciprocal square-root estimate
#[inline]
pub fn rsqrte(self) -> Self {
unsafe {
use coresimd::simd_llvm::simd_fsqrt;
$id::splat(1.) / simd_fsqrt(self)
}
}

/// Fused multiply add: `self * y + z`
#[inline]
pub fn fma(self, y: Self, z: Self) -> Self {
use coresimd::ppsv::codegen::fma::FloatFma;
FloatFma::fma(self, y, z)
}

/// Sin
#[inline(always)]
pub fn sin(self) -> Self {
use coresimd::ppsv::codegen::sin::FloatSin;
FloatSin::sin(self)
}

/// Cos
#[inline]
pub fn cos(self) -> Self {
use coresimd::ppsv::codegen::cos::FloatCos;
FloatCos::cos(self)
}
}
};
}

macro_rules! test_float_math {
($id:ident, $elem_ty:ident) => {

fn sqrt2() -> $elem_ty {
match ::mem::size_of::<$elem_ty>() {
4 => 1.4142135 as $elem_ty,
8 => 1.4142135623730951 as $elem_ty,
_ => unreachable!(),
}
}

fn pi() -> $elem_ty {
match ::mem::size_of::<$elem_ty>() {
4 => ::std::f32::consts::PI as $elem_ty,
8 => ::std::f64::consts::PI as $elem_ty,
_ => unreachable!(),
}
}

#[test]
fn abs() {
use coresimd::simd::*;
let o = $id::splat(1 as $elem_ty);
assert_eq!(o, o.abs());

let mo = $id::splat(-1 as $elem_ty);
assert_eq!(o, mo.abs());
}

#[test]
fn sqrt() {
use coresimd::simd::*;
let z = $id::splat(0 as $elem_ty);
let o = $id::splat(1 as $elem_ty);
assert_eq!(z, z.sqrt());
assert_eq!(o, o.sqrt());

let t = $id::splat(2 as $elem_ty);
let e = $id::splat(sqrt2() as $elem_ty);
assert_eq!(e, t.sqrt());
}

#[test]
fn sqrte() {
use coresimd::simd::*;
let z = $id::splat(0 as $elem_ty);
let o = $id::splat(1 as $elem_ty);
assert_eq!(z, z.sqrte());
assert_eq!(o, o.sqrte());

let t = $id::splat(2 as $elem_ty);
let e = $id::splat(sqrt2() as $elem_ty);
let error = (e - t.sqrte()).abs();
let tol = $id::splat(2.4e-4 as $elem_ty);

assert!(error.le(tol).all());
}

#[test]
fn rsqrte() {
use coresimd::simd::*;
let o = $id::splat(1 as $elem_ty);
assert_eq!(o, o.rsqrte());

let t = $id::splat(2 as $elem_ty);
let e = 1. / sqrt2();
let error = (e - t.rsqrte()).abs();
let tol = $id::splat(2.4e-4 as $elem_ty);
assert!(error.le(tol).all());
}

#[test]
fn fma() {
use coresimd::simd::*;
let z = $id::splat(0 as $elem_ty);
let o = $id::splat(1 as $elem_ty);
let t = $id::splat(2 as $elem_ty);
let t3 = $id::splat(3 as $elem_ty);
let f = $id::splat(4 as $elem_ty);

assert_eq!(z, z.fma(z, z));
assert_eq!(o, o.fma(o, z));
assert_eq!(o, o.fma(z, o));
assert_eq!(o, z.fma(o, o));

assert_eq!(t, o.fma(o, o));
assert_eq!(t, o.fma(t, z));
assert_eq!(t, t.fma(o, z));

assert_eq!(f, t.fma(t, z));
assert_eq!(f, t.fma(o, t));
assert_eq!(t3, t.fma(o, o));
}

#[test]
fn sin() {
use coresimd::simd::*;
let z = $id::splat(0 as $elem_ty);
let p = $id::splat(pi() as $elem_ty);
let ph = $id::splat(pi() as $elem_ty / 2.);
let o_r = $id::splat((pi() as $elem_ty / 2.).sin());
let z_r = $id::splat((pi() as $elem_ty).sin());

assert_eq!(z, z.sin());
assert_eq!(o_r, ph.sin());
assert_eq!(z_r, p.sin());
}

#[test]
fn cos() {
use coresimd::simd::*;
let z = $id::splat(0 as $elem_ty);
let o = $id::splat(1 as $elem_ty);
let p = $id::splat(pi() as $elem_ty);
let ph = $id::splat(pi() as $elem_ty / 2.);
let z_r = $id::splat((pi() as $elem_ty / 2.).cos());
let o_r = $id::splat((pi() as $elem_ty).cos());

assert_eq!(o, z.cos());
assert_eq!(z_r, ph.cos());
assert_eq!(o_r, p.cos());
}
};
}
15 changes: 11 additions & 4 deletions coresimd/ppsv/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ mod default;
#[macro_use]
mod eq;
#[macro_use]
mod float_math;
#[macro_use]
mod fmt;
#[macro_use]
mod from;
Expand Down Expand Up @@ -128,7 +130,8 @@ pub trait Lanes<A> {}

/// Defines a portable packed SIMD floating-point vector type.
macro_rules! simd_f_ty {
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
$test_macro:ident |
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
vector_impl!(
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
Expand All @@ -142,7 +145,8 @@ macro_rules! simd_f_ty {
[impl_neg_op, $id, $elem_ty],
[impl_partial_eq, $id],
[impl_default, $id, $elem_ty],
[impl_float_minmax_ops, $id]
[impl_float_minmax_ops, $id],
[impl_float_math, $id]
);

$test_macro!(
Expand All @@ -160,14 +164,16 @@ macro_rules! simd_f_ty {
test_default!($id, $elem_ty);
test_mask_select!($mask_ty, $id, $elem_ty);
test_float_minmax_ops!($id, $elem_ty);
test_float_math!($id, $elem_ty);
}
);
}
}

/// Defines a portable packed SIMD signed-integer vector type.
macro_rules! simd_i_ty {
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
$test_macro:ident |
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
vector_impl!(
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
Expand Down Expand Up @@ -221,7 +227,8 @@ macro_rules! simd_i_ty {

/// Defines a portable packed SIMD unsigned-integer vector type.
macro_rules! simd_u_ty {
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
$test_macro:ident |
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
vector_impl!(
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
Expand Down
43 changes: 43 additions & 0 deletions coresimd/ppsv/codegen/abs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//! Vector absolute value

use coresimd::simd::*;

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.fabs.v2f32"]
fn abs_v2f32(x: f32x2) -> f32x2;
#[link_name = "llvm.fabs.v4f32"]
fn abs_v4f32(x: f32x4) -> f32x4;
#[link_name = "llvm.fabs.v8f32"]
fn abs_v8f32(x: f32x8) -> f32x8;
#[link_name = "llvm.fabs.v16f32"]
fn abs_v16f32(x: f32x16) -> f32x16;
#[link_name = "llvm.fabs.v2f64"]
fn abs_v2f64(x: f64x2) -> f64x2;
#[link_name = "llvm.fabs.v4f64"]
fn abs_v4f64(x: f64x4) -> f64x4;
#[link_name = "llvm.fabs.v8f64"]
fn abs_v8f64(x: f64x8) -> f64x8;
}

pub(crate) trait FloatAbs {
fn abs(self) -> Self;
}

macro_rules! impl_fabs {
($id:ident: $fn:ident) => {
impl FloatAbs for $id {
fn abs(self) -> Self {
unsafe { $fn(self) }
}
}
}
}

impl_fabs!(f32x2: abs_v2f32);
impl_fabs!(f32x4: abs_v4f32);
impl_fabs!(f32x8: abs_v8f32);
impl_fabs!(f32x16: abs_v16f32);
impl_fabs!(f64x2: abs_v2f64);
impl_fabs!(f64x4: abs_v4f64);
impl_fabs!(f64x8: abs_v8f64);
43 changes: 43 additions & 0 deletions coresimd/ppsv/codegen/cos.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//! Exact vector cos

use coresimd::simd::*;

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.cos.v2f32"]
fn cos_v2f32(x: f32x2) -> f32x2;
#[link_name = "llvm.cos.v4f32"]
fn cos_v4f32(x: f32x4) -> f32x4;
#[link_name = "llvm.cos.v8f32"]
fn cos_v8f32(x: f32x8) -> f32x8;
#[link_name = "llvm.cos.v16f32"]
fn cos_v16f32(x: f32x16) -> f32x16;
#[link_name = "llvm.cos.v2f64"]
fn cos_v2f64(x: f64x2) -> f64x2;
#[link_name = "llvm.cos.v4f64"]
fn cos_v4f64(x: f64x4) -> f64x4;
#[link_name = "llvm.cos.v8f64"]
fn cos_v8f64(x: f64x8) -> f64x8;
}

pub(crate) trait FloatCos {
fn cos(self) -> Self;
}

macro_rules! impl_fcos {
($id:ident: $fn:ident) => {
impl FloatCos for $id {
fn cos(self) -> Self {
unsafe { $fn(self) }
}
}
}
}

impl_fcos!(f32x2: cos_v2f32);
impl_fcos!(f32x4: cos_v4f32);
impl_fcos!(f32x8: cos_v8f32);
impl_fcos!(f32x16: cos_v16f32);
impl_fcos!(f64x2: cos_v2f64);
impl_fcos!(f64x4: cos_v4f64);
impl_fcos!(f64x8: cos_v8f64);
43 changes: 43 additions & 0 deletions coresimd/ppsv/codegen/fma.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//! Vector fused multiply add

use coresimd::simd::*;

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.fma.v2f32"]
fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
#[link_name = "llvm.fma.v4f32"]
fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
#[link_name = "llvm.fma.v8f32"]
fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
#[link_name = "llvm.fma.v16f32"]
fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
#[link_name = "llvm.fma.v2f64"]
fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
#[link_name = "llvm.fma.v4f64"]
fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
#[link_name = "llvm.fma.v8f64"]
fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
}

pub(crate) trait FloatFma {
fn fma(self, y: Self, z: Self) -> Self;
}

macro_rules! impl_fma {
($id:ident: $fn:ident) => {
impl FloatFma for $id {
fn fma(self, y: Self, z: Self) -> Self {
unsafe { $fn(self, y, z) }
}
}
}
}

impl_fma!(f32x2: fma_v2f32);
impl_fma!(f32x4: fma_v4f32);
impl_fma!(f32x8: fma_v8f32);
impl_fma!(f32x16: fma_v16f32);
impl_fma!(f64x2: fma_v2f64);
impl_fma!(f64x4: fma_v4f64);
impl_fma!(f64x8: fma_v8f64);
6 changes: 6 additions & 0 deletions coresimd/ppsv/codegen/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,9 @@
pub mod wrapping;

pub mod masks_reductions;

pub mod sqrt;
pub mod abs;
pub mod fma;
pub mod sin;
pub mod cos;
Loading