Skip to content

Commit 50fb8df

Browse files
committed
use manual implementation in vshrq_n_s8 and vshrq_n_u8
1 parent b8bdeb5 commit 50fb8df

File tree

3 files changed

+79
-45
lines changed

3 files changed

+79
-45
lines changed

crates/core_arch/src/arm/neon/generated.rs

-40
Original file line numberDiff line numberDiff line change
@@ -7990,18 +7990,6 @@ pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
79907990
simd_shr(a, vdup_n_s8(N.try_into().unwrap()))
79917991
}
79927992

7993-
/// Shift right
7994-
#[inline]
7995-
#[target_feature(enable = "neon")]
7996-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
7997-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
7998-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
7999-
#[rustc_legacy_const_generics(1)]
8000-
pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
8001-
static_assert!(N : i32 where N >= 1 && N <= 8);
8002-
simd_shr(a, vdupq_n_s8(N.try_into().unwrap()))
8003-
}
8004-
80057993
/// Shift right
80067994
#[inline]
80077995
#[target_feature(enable = "neon")]
@@ -8086,18 +8074,6 @@ pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
80868074
simd_shr(a, vdup_n_u8(N.try_into().unwrap()))
80878075
}
80888076

8089-
/// Shift right
8090-
#[inline]
8091-
#[target_feature(enable = "neon")]
8092-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
8093-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
8094-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
8095-
#[rustc_legacy_const_generics(1)]
8096-
pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
8097-
static_assert!(N : i32 where N >= 1 && N <= 8);
8098-
simd_shr(a, vdupq_n_u8(N.try_into().unwrap()))
8099-
}
8100-
81018077
/// Shift right
81028078
#[inline]
81038079
#[target_feature(enable = "neon")]
@@ -14422,14 +14398,6 @@ mod test {
1442214398
assert_eq!(r, e);
1442314399
}
1442414400

14425-
#[simd_test(enable = "neon")]
14426-
unsafe fn test_vshrq_n_s8() {
14427-
let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
14428-
let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
14429-
let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a)));
14430-
assert_eq!(r, e);
14431-
}
14432-
1443314401
#[simd_test(enable = "neon")]
1443414402
unsafe fn test_vshr_n_s16() {
1443514403
let a: i16x4 = i16x4::new(4, 8, 12, 16);
@@ -14486,14 +14454,6 @@ mod test {
1448614454
assert_eq!(r, e);
1448714455
}
1448814456

14489-
#[simd_test(enable = "neon")]
14490-
unsafe fn test_vshrq_n_u8() {
14491-
let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
14492-
let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
14493-
let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a)));
14494-
assert_eq!(r, e);
14495-
}
14496-
1449714457
#[simd_test(enable = "neon")]
1449814458
unsafe fn test_vshr_n_u16() {
1449914459
let a: u16x4 = u16x4::new(4, 8, 12, 16);

crates/core_arch/src/arm/neon/mod.rs

+75-1
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ use crate::mem::align_of;
1010
use crate::{
1111
core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute,
1212
};
13+
use core::convert::TryInto;
1314
#[cfg(test)]
1415
use stdarch_test::assert_instr;
15-
use core::convert::TryInto;
1616

1717
pub(crate) type p8 = u8;
1818
pub(crate) type p16 = u16;
@@ -4412,6 +4412,64 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
44124412
vdupq_n_f32(value)
44134413
}
44144414

4415+
/// Signed shift right
4416+
#[inline]
4417+
#[target_feature(enable = "neon")]
4418+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4419+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
4420+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("sshr", N = 2))]
4421+
#[rustc_legacy_const_generics(1)]
4422+
pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
4423+
static_assert!(N : i32 where N >= 1 && N <= 8);
4424+
int8x16_t(
4425+
a.0 >> N,
4426+
a.1 >> N,
4427+
a.2 >> N,
4428+
a.3 >> N,
4429+
a.4 >> N,
4430+
a.5 >> N,
4431+
a.6 >> N,
4432+
a.7 >> N,
4433+
a.8 >> N,
4434+
a.9 >> N,
4435+
a.10 >> N,
4436+
a.11 >> N,
4437+
a.12 >> N,
4438+
a.13 >> N,
4439+
a.14 >> N,
4440+
a.15 >> N,
4441+
)
4442+
}
4443+
4444+
/// Unsigned shift right
4445+
#[inline]
4446+
#[target_feature(enable = "neon")]
4447+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4448+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
4449+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("ushr", N = 2))]
4450+
#[rustc_legacy_const_generics(1)]
4451+
pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
4452+
static_assert!(N : i32 where N >= 1 && N <= 8);
4453+
uint8x16_t(
4454+
a.0 >> N,
4455+
a.1 >> N,
4456+
a.2 >> N,
4457+
a.3 >> N,
4458+
a.4 >> N,
4459+
a.5 >> N,
4460+
a.6 >> N,
4461+
a.7 >> N,
4462+
a.8 >> N,
4463+
a.9 >> N,
4464+
a.10 >> N,
4465+
a.11 >> N,
4466+
a.12 >> N,
4467+
a.13 >> N,
4468+
a.14 >> N,
4469+
a.15 >> N,
4470+
)
4471+
}
4472+
44154473
/// Extract vector from pair of vectors
44164474
#[inline]
44174475
#[target_feature(enable = "neon")]
@@ -5845,6 +5903,22 @@ mod tests {
58455903
assert_eq!(r, 2);
58465904
}
58475905

5906+
#[simd_test(enable = "neon")]
5907+
unsafe fn test_vshrq_n_s8() {
5908+
let a = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
5909+
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5910+
let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a)));
5911+
assert_eq!(r, e);
5912+
}
5913+
5914+
#[simd_test(enable = "neon")]
5915+
unsafe fn test_vshrq_n_u8() {
5916+
let a = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
5917+
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5918+
let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a)));
5919+
assert_eq!(r, e);
5920+
}
5921+
58485922
#[simd_test(enable = "neon")]
58495923
unsafe fn test_vext_s64() {
58505924
let a: i64x1 = i64x1::new(0);

crates/stdarch-gen/neon.spec

+4-4
Original file line numberDiff line numberDiff line change
@@ -1914,15 +1914,15 @@ n-suffix
19141914
constn = N
19151915
multi_fn = static_assert-N-1-bits
19161916
multi_fn = simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()}
1917-
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
1917+
a = 4, 8, 12, 16, 20, 24, 28, 32
19181918
n = 2
1919-
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
1919+
validate 1, 2, 3, 4, 5, 6, 7, 8
19201920

19211921
arm = vshr.s
19221922
aarch64 = sshr
1923-
generate int*_t, int64x*_t
1923+
generate int8x8_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x*_t
19241924
aarch64 = ushr
1925-
generate uint*_t, uint64x*_t
1925+
generate uint8x8_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x*_t
19261926

19271927
/// Shift right narrow
19281928
name = vshrn_n

0 commit comments

Comments
 (0)