|
| 1 | +//! F16C intrinsics: |
| 2 | +//! https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769 |
| 3 | +
|
| 4 | +use crate::{ |
| 5 | + core_arch::{simd::*, x86::*}, |
| 6 | + hint::unreachable_unchecked, |
| 7 | + mem::transmute, |
| 8 | +}; |
| 9 | + |
| 10 | +#[cfg(test)] |
| 11 | +use stdsimd_test::assert_instr; |
| 12 | + |
| 13 | +#[allow(improper_ctypes)] |
| 14 | +extern "unadjusted" { |
| 15 | + #[link_name = "llvm.x86.vcvtph2ps.128"] |
| 16 | + fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4; |
| 17 | + #[link_name = "llvm.x86.vcvtph2ps.256"] |
| 18 | + fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8; |
| 19 | + #[link_name = "llvm.x86.vcvtps2ph.128"] |
| 20 | + fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8; |
| 21 | + #[link_name = "llvm.x86.vcvtps2ph.256"] |
| 22 | + fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8; |
| 23 | +} |
| 24 | + |
| 25 | +#[inline] |
| 26 | +#[target_feature(enable = "avx512f")] |
| 27 | +#[cfg_attr(test, assert_instr("vcvtph2ps"))] |
| 28 | +pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 { |
| 29 | + transmute(llvm_vcvtph2ps_128(transmute(a))) |
| 30 | +} |
| 31 | + |
| 32 | +#[inline] |
| 33 | +#[target_feature(enable = "avx512f")] |
| 34 | +#[cfg_attr(test, assert_instr("vcvtph2ps"))] |
| 35 | +pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 { |
| 36 | + transmute(llvm_vcvtph2ps_256(transmute(a))) |
| 37 | +} |
| 38 | + |
| 39 | +macro_rules! dispatch_rounding { |
| 40 | + ($rounding:ident, $call:ident) => {{ |
| 41 | + const NEAREST: i32 = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; |
| 42 | + const DOWN: i32 = _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; |
| 43 | + const UP: i32 = _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; |
| 44 | + const TRUNCATE: i32 = _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; |
| 45 | + const MXCSR: i32 = _MM_FROUND_CUR_DIRECTION; |
| 46 | + match $rounding { |
| 47 | + NEAREST => call!(NEAREST), |
| 48 | + DOWN => call!(DOWN), |
| 49 | + UP => call!(UP), |
| 50 | + TRUNCATE => call!(TRUNCATE), |
| 51 | + MXCSR => call!(MXCSR), |
| 52 | + _ => unreachable_unchecked(), |
| 53 | + } |
| 54 | + }}; |
| 55 | +} |
| 56 | + |
| 57 | +#[inline] |
| 58 | +#[target_feature(enable = "avx512f")] |
| 59 | +#[rustc_args_required_const(1)] |
| 60 | +#[cfg_attr(test, assert_instr("vcvtps2ph", rounding = 0))] |
| 61 | +pub unsafe fn _mm_cvtps_ph(a: __m128, rounding: i32) -> __m128i { |
| 62 | + let a = transmute(a); |
| 63 | + macro_rules! call { |
| 64 | + ($rounding:ident) => { |
| 65 | + llvm_vcvtps2ph_128(a, $rounding) |
| 66 | + }; |
| 67 | + } |
| 68 | + transmute(dispatch_rounding!(rounding, call)) |
| 69 | +} |
| 70 | + |
| 71 | +#[inline] |
| 72 | +#[target_feature(enable = "avx512f")] |
| 73 | +#[rustc_args_required_const(1)] |
| 74 | +#[cfg_attr(test, assert_instr("vcvtps2ph", rounding = 0))] |
| 75 | +pub unsafe fn _mm256_cvtps_ph(a: __m256, rounding: i32) -> __m128i { |
| 76 | + let a = transmute(a); |
| 77 | + macro_rules! call { |
| 78 | + ($rounding:ident) => { |
| 79 | + llvm_vcvtps2ph_256(a, $rounding) |
| 80 | + }; |
| 81 | + } |
| 82 | + transmute(dispatch_rounding!(rounding, call)) |
| 83 | +} |
| 84 | + |
| 85 | +#[cfg(test)] |
| 86 | +mod tests { |
| 87 | + use crate::{core_arch::x86::*, mem::transmute}; |
| 88 | + use stdsimd_test::simd_test; |
| 89 | + |
| 90 | + #[simd_test(enable = "avx512f")] |
| 91 | + unsafe fn test_mm_cvtph_ps() { |
| 92 | + let array = [1_f32, 2_f32, 3_f32, 4_f32]; |
| 93 | + let float_vec: __m128 = transmute(array); |
| 94 | + let halfs: __m128i = _mm_cvtps_ph(float_vec, 0); |
| 95 | + let floats: __m128 = _mm_cvtph_ps(halfs); |
| 96 | + let result: [f32; 4] = transmute(floats); |
| 97 | + assert_eq!(result, array); |
| 98 | + } |
| 99 | + |
| 100 | + #[simd_test(enable = "avx512f")] |
| 101 | + unsafe fn test_mm256_cvtph_ps() { |
| 102 | + let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32]; |
| 103 | + let float_vec: __m256 = transmute(array); |
| 104 | + let halfs: __m128i = _mm256_cvtps_ph(float_vec, 0); |
| 105 | + let floats: __m256 = _mm256_cvtph_ps(halfs); |
| 106 | + let result: [f32; 8] = transmute(floats); |
| 107 | + assert_eq!(result, array); |
| 108 | + } |
| 109 | +} |
0 commit comments