Skip to content

Commit 9bf9c83

Browse files
alexcrichtongnzlbg
authored andcommitted
Match clang for _mm512_abs_epi32 intrinsics
This commit updates stdsimd's codegen to match Clang's for the `_mm512_abs_epi32` intrinsic (and masked versions) which doesn't use any LLVM intrinsic calls, but rather raw SIMD operations. These are built on top of the new `simd_select_bitmask` intrinsic introduced recently to the compiler!
1 parent 955985b commit 9bf9c83

File tree

3 files changed

+13
-10
lines changed

3 files changed

+13
-10
lines changed

.appveyor.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ environment:
1313
install:
1414
# Install rust, x86_64-pc-windows-msvc host
1515
- appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
16-
- rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly-2018-10-20
16+
- rustup-init.exe -y --default-host x86_64-pc-windows-msvc --default-toolchain nightly
1717
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
1818
- if NOT "%TARGET%" == "x86_64-pc-windows-msvc" rustup target add %TARGET%
1919
- rustc -vV

coresimd/simd_llvm.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ extern "platform-intrinsic" {
4949
pub fn simd_reduce_any<T>(x: T) -> bool;
5050

5151
pub fn simd_select<M, T>(m: M, a: T, b: T) -> T;
52+
pub fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
5253

5354
pub fn simd_fmin<T>(a: T, b: T) -> T;
5455
pub fn simd_fmax<T>(a: T, b: T) -> T;

coresimd/x86/avx512f.rs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use coresimd::simd::*;
2+
use coresimd::simd_llvm::*;
23
use coresimd::x86::*;
34
use mem;
45

@@ -12,7 +13,11 @@ use stdsimd_test::assert_instr;
1213
#[target_feature(enable = "avx512f")]
1314
#[cfg_attr(test, assert_instr(vpabsd))]
1415
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
15-
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), -1))
16+
let a = a.as_i32x16();
17+
let zero: i32x16 = mem::zeroed();
18+
let sub = simd_sub(zero, a);
19+
let cmp: i32x16 = simd_gt(a, zero);
20+
mem::transmute(simd_select(cmp, a, sub))
1621
}
1722

1823
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
@@ -24,7 +29,8 @@ pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
2429
#[target_feature(enable = "avx512f")]
2530
#[cfg_attr(test, assert_instr(vpabsd))]
2631
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
27-
mem::transmute(pabsd(a.as_i32x16(), src.as_i32x16(), k))
32+
let abs = _mm512_abs_epi32(a).as_i32x16();
33+
mem::transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
2834
}
2935

3036
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
@@ -36,7 +42,9 @@ pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _
3642
#[target_feature(enable = "avx512f")]
3743
#[cfg_attr(test, assert_instr(vpabsd))]
3844
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
39-
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), k))
45+
let abs = _mm512_abs_epi32(a).as_i32x16();
46+
let zero = _mm512_setzero_si512().as_i32x16();
47+
mem::transmute(simd_select_bitmask(k, abs, zero))
4048
}
4149

4250
/// Return vector of type `__m512i` with all elements set to zero.
@@ -77,12 +85,6 @@ pub unsafe fn _mm512_setr_epi32(
7785
mem::transmute(r)
7886
}
7987

80-
#[allow(improper_ctypes)]
81-
extern "C" {
82-
#[link_name = "llvm.x86.avx512.mask.pabs.d.512"]
83-
fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16;
84-
}
85-
8688
#[cfg(test)]
8789
mod tests {
8890
use std;

0 commit comments

Comments
 (0)