Skip to content

Commit 4e4151e

Browse files
eduardosmAmanieu
authored andcommitted
Implement _mm(256)_abs_epi* without x86-specific LLVM intrinsics
1 parent 9814fb0 commit 4e4151e

File tree

3 files changed

+91
-21
lines changed

3 files changed

+91
-21
lines changed

crates/core_arch/src/simd.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,73 @@ simd_ty!(
371371
x7
372372
);
373373

374+
simd_m_ty!(
375+
m8x32[i8]:
376+
x0,
377+
x1,
378+
x2,
379+
x3,
380+
x4,
381+
x5,
382+
x6,
383+
x7,
384+
x8,
385+
x9,
386+
x10,
387+
x11,
388+
x12,
389+
x13,
390+
x14,
391+
x15,
392+
x16,
393+
x17,
394+
x18,
395+
x19,
396+
x20,
397+
x21,
398+
x22,
399+
x23,
400+
x24,
401+
x25,
402+
x26,
403+
x27,
404+
x28,
405+
x29,
406+
x30,
407+
x31
408+
);
409+
simd_m_ty!(
410+
m16x16[i16]:
411+
x0,
412+
x1,
413+
x2,
414+
x3,
415+
x4,
416+
x5,
417+
x6,
418+
x7,
419+
x8,
420+
x9,
421+
x10,
422+
x11,
423+
x12,
424+
x13,
425+
x14,
426+
x15
427+
);
428+
simd_m_ty!(
429+
m32x8[i32]:
430+
x0,
431+
x1,
432+
x2,
433+
x3,
434+
x4,
435+
x5,
436+
x6,
437+
x7
438+
);
439+
440+
374441
// 512-bit wide types:
375442

376443
simd_ty!(

crates/core_arch/src/x86/avx2.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ use stdarch_test::assert_instr;
3232
#[cfg_attr(test, assert_instr(vpabsd))]
3333
#[stable(feature = "simd_x86", since = "1.27.0")]
3434
pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
35-
transmute(pabsd(a.as_i32x8()))
35+
let a = a.as_i32x8();
36+
let zero = i32x8::splat(0);
37+
let r = simd_select::<m32x8, _>(simd_lt(a, zero), simd_neg(a), a);
38+
transmute(r)
3639
}
3740

3841
/// Computes the absolute values of packed 16-bit integers in `a`.
@@ -43,7 +46,10 @@ pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i {
4346
#[cfg_attr(test, assert_instr(vpabsw))]
4447
#[stable(feature = "simd_x86", since = "1.27.0")]
4548
pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
46-
transmute(pabsw(a.as_i16x16()))
49+
let a = a.as_i16x16();
50+
let zero = i16x16::splat(0);
51+
let r = simd_select::<m16x16, _>(simd_lt(a, zero), simd_neg(a), a);
52+
transmute(r)
4753
}
4854

4955
/// Computes the absolute values of packed 8-bit integers in `a`.
@@ -54,7 +60,10 @@ pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i {
5460
#[cfg_attr(test, assert_instr(vpabsb))]
5561
#[stable(feature = "simd_x86", since = "1.27.0")]
5662
pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i {
57-
transmute(pabsb(a.as_i8x32()))
63+
let a = a.as_i8x32();
64+
let zero = i8x32::splat(0);
65+
let r = simd_select::<m8x32, _>(simd_lt(a, zero), simd_neg(a), a);
66+
transmute(r)
5867
}
5968

6069
/// Adds packed 64-bit integers in `a` and `b`.
@@ -3639,12 +3648,6 @@ pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
36393648

36403649
#[allow(improper_ctypes)]
36413650
extern "C" {
3642-
#[link_name = "llvm.x86.avx2.pabs.b"]
3643-
fn pabsb(a: i8x32) -> u8x32;
3644-
#[link_name = "llvm.x86.avx2.pabs.w"]
3645-
fn pabsw(a: i16x16) -> u16x16;
3646-
#[link_name = "llvm.x86.avx2.pabs.d"]
3647-
fn pabsd(a: i32x8) -> u32x8;
36483651
#[link_name = "llvm.x86.avx2.phadd.w"]
36493652
fn phaddw(a: i16x16, b: i16x16) -> i16x16;
36503653
#[link_name = "llvm.x86.avx2.phadd.d"]

crates/core_arch/src/x86/ssse3.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@ use stdarch_test::assert_instr;
1717
#[cfg_attr(test, assert_instr(pabsb))]
1818
#[stable(feature = "simd_x86", since = "1.27.0")]
1919
pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20-
transmute(pabsb128(a.as_i8x16()))
20+
let a = a.as_i8x16();
21+
let zero = i8x16::splat(0);
22+
let r = simd_select::<m8x16, _>(simd_lt(a, zero), simd_neg(a), a);
23+
transmute(r)
2124
}
2225

2326
/// Computes the absolute value of each of the packed 16-bit signed integers in
@@ -30,7 +33,10 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
3033
#[cfg_attr(test, assert_instr(pabsw))]
3134
#[stable(feature = "simd_x86", since = "1.27.0")]
3235
pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
33-
transmute(pabsw128(a.as_i16x8()))
36+
let a = a.as_i16x8();
37+
let zero = i16x8::splat(0);
38+
let r = simd_select::<m16x8, _>(simd_lt(a, zero), simd_neg(a), a);
39+
transmute(r)
3440
}
3541

3642
/// Computes the absolute value of each of the packed 32-bit signed integers in
@@ -43,7 +49,10 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
4349
#[cfg_attr(test, assert_instr(pabsd))]
4450
#[stable(feature = "simd_x86", since = "1.27.0")]
4551
pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
46-
transmute(pabsd128(a.as_i32x4()))
52+
let a = a.as_i32x4();
53+
let zero = i32x4::splat(0);
54+
let r = simd_select::<m32x4, _>(simd_lt(a, zero), simd_neg(a), a);
55+
transmute(r)
4756
}
4857

4958
/// Shuffles bytes from `a` according to the content of `b`.
@@ -285,15 +294,6 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
285294

286295
#[allow(improper_ctypes)]
287296
extern "C" {
288-
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
289-
fn pabsb128(a: i8x16) -> u8x16;
290-
291-
#[link_name = "llvm.x86.ssse3.pabs.w.128"]
292-
fn pabsw128(a: i16x8) -> u16x8;
293-
294-
#[link_name = "llvm.x86.ssse3.pabs.d.128"]
295-
fn pabsd128(a: i32x4) -> u32x4;
296-
297297
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
298298
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
299299

0 commit comments

Comments
 (0)