Skip to content

Commit 789ee62

Browse files
committed
Implement F16C intrinsics
1 parent a9788c7 commit 789ee62

File tree

4 files changed

+117
-4
lines changed

4 files changed

+117
-4
lines changed

crates/core_arch/src/lib.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,4 @@ mod core_arch;
7575
pub use self::core_arch::arch::*;
7676

7777
#[allow(unused_imports)]
78-
use core::{ffi, intrinsics, marker, mem, ptr, sync};
79-
80-
#[cfg(test)]
81-
use core::hint;
78+
use core::{ffi, hint, intrinsics, marker, mem, ptr, sync};

crates/core_arch/src/simd.rs

+4
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ simd_ty!(i32x8[i32]:
184184
| x0, x1, x2, x3, x4, x5, x6, x7);
185185
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
186186

187+
simd_ty!(f32x8[f32]:
188+
f32, f32, f32, f32, f32, f32, f32, f32 |
189+
x0, x1, x2, x3, x4, x5, x6, x7);
190+
187191
// 512-bit wide types:
188192

189193
simd_ty!(i32x16[i32]:

crates/core_arch/src/x86/f16c.rs

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
//! F16C intrinsics:
2+
//! https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769
3+
4+
use crate::{
5+
core_arch::{simd::*, x86::*},
6+
hint::unreachable_unchecked,
7+
mem::transmute,
8+
};
9+
10+
#[cfg(test)]
11+
use stdsimd_test::assert_instr;
12+
13+
#[allow(improper_ctypes)]
14+
extern "unadjusted" {
15+
#[link_name = "llvm.x86.vcvtph2ps.128"]
16+
fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
17+
#[link_name = "llvm.x86.vcvtph2ps.256"]
18+
fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
19+
#[link_name = "llvm.x86.vcvtps2ph.128"]
20+
fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
21+
#[link_name = "llvm.x86.vcvtps2ph.256"]
22+
fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
23+
}
24+
25+
#[inline]
26+
#[target_feature(enable = "avx512f")]
27+
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
28+
pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
29+
transmute(llvm_vcvtph2ps_128(transmute(a)))
30+
}
31+
32+
#[inline]
33+
#[target_feature(enable = "avx512f")]
34+
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
35+
pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
36+
transmute(llvm_vcvtph2ps_256(transmute(a)))
37+
}
38+
39+
macro_rules! dispatch_rounding {
40+
($rounding:ident, $call:ident) => {{
41+
const NEAREST: i32 = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
42+
const DOWN: i32 = _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
43+
const UP: i32 = _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
44+
const TRUNCATE: i32 = _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
45+
const MXCSR: i32 = _MM_FROUND_CUR_DIRECTION;
46+
match $rounding {
47+
NEAREST => call!(NEAREST),
48+
DOWN => call!(DOWN),
49+
UP => call!(UP),
50+
TRUNCATE => call!(TRUNCATE),
51+
MXCSR => call!(MXCSR),
52+
_ => unreachable_unchecked(),
53+
}
54+
}};
55+
}
56+
57+
#[inline]
58+
#[target_feature(enable = "avx512f")]
59+
#[rustc_args_required_const(1)]
60+
#[cfg_attr(test, assert_instr("vcvtps2ph", rounding = 0))]
61+
pub unsafe fn _mm_cvtps_ph(a: __m128, rounding: i32) -> __m128i {
62+
let a = transmute(a);
63+
macro_rules! call {
64+
($rounding:ident) => {
65+
llvm_vcvtps2ph_128(a, $rounding)
66+
};
67+
}
68+
transmute(dispatch_rounding!(rounding, call))
69+
}
70+
71+
#[inline]
72+
#[target_feature(enable = "avx512f")]
73+
#[rustc_args_required_const(1)]
74+
#[cfg_attr(test, assert_instr("vcvtps2ph", rounding = 0))]
75+
pub unsafe fn _mm256_cvtps_ph(a: __m256, rounding: i32) -> __m128i {
76+
let a = transmute(a);
77+
macro_rules! call {
78+
($rounding:ident) => {
79+
llvm_vcvtps2ph_256(a, $rounding)
80+
};
81+
}
82+
transmute(dispatch_rounding!(rounding, call))
83+
}
84+
85+
#[cfg(test)]
86+
mod tests {
87+
use crate::{core_arch::x86::*, mem::transmute};
88+
use stdsimd_test::simd_test;
89+
90+
#[simd_test(enable = "avx512f")]
91+
unsafe fn test_mm_cvtph_ps() {
92+
let array = [1_f32, 2_f32, 3_f32, 4_f32];
93+
let float_vec: __m128 = transmute(array);
94+
let halfs: __m128i = _mm_cvtps_ph(float_vec, 0);
95+
let floats: __m128 = _mm_cvtph_ps(halfs);
96+
let result: [f32; 4] = transmute(floats);
97+
assert_eq!(result, array);
98+
}
99+
100+
#[simd_test(enable = "avx512f")]
101+
unsafe fn test_mm256_cvtph_ps() {
102+
let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32];
103+
let float_vec: __m256 = transmute(array);
104+
let halfs: __m128i = _mm256_cvtps_ph(float_vec, 0);
105+
let floats: __m256 = _mm256_cvtph_ps(halfs);
106+
let result: [f32; 8] = transmute(floats);
107+
assert_eq!(result, array);
108+
}
109+
}

crates/core_arch/src/x86/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -568,3 +568,6 @@ pub use self::bt::*;
568568

569569
mod rtm;
570570
pub use self::rtm::*;
571+
572+
mod f16c;
573+
pub use self::f16c::*;

0 commit comments

Comments
 (0)