Skip to content

Commit b0d7df4

Browse files
committed
use manual implementation in vshrq_n_s8 and vshrq_n_u8
1 parent f5e809e commit b0d7df4

File tree

3 files changed

+79
-45
lines changed

3 files changed

+79
-45
lines changed

crates/core_arch/src/arm/neon/generated.rs

-40
Original file line numberDiff line numberDiff line change
@@ -8122,18 +8122,6 @@ pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
81228122
simd_shr(a, vdup_n_s8(N.try_into().unwrap()))
81238123
}
81248124

8125-
/// Shift right
8126-
#[inline]
8127-
#[target_feature(enable = "neon")]
8128-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
8129-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
8130-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
8131-
#[rustc_legacy_const_generics(1)]
8132-
pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
8133-
static_assert!(N : i32 where N >= 1 && N <= 8);
8134-
simd_shr(a, vdupq_n_s8(N.try_into().unwrap()))
8135-
}
8136-
81378125
/// Shift right
81388126
#[inline]
81398127
#[target_feature(enable = "neon")]
@@ -8218,18 +8206,6 @@ pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
82188206
simd_shr(a, vdup_n_u8(N.try_into().unwrap()))
82198207
}
82208208

8221-
/// Shift right
8222-
#[inline]
8223-
#[target_feature(enable = "neon")]
8224-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
8225-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
8226-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
8227-
#[rustc_legacy_const_generics(1)]
8228-
pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
8229-
static_assert!(N : i32 where N >= 1 && N <= 8);
8230-
simd_shr(a, vdupq_n_u8(N.try_into().unwrap()))
8231-
}
8232-
82338209
/// Shift right
82348210
#[inline]
82358211
#[target_feature(enable = "neon")]
@@ -14662,14 +14638,6 @@ mod test {
1466214638
assert_eq!(r, e);
1466314639
}
1466414640

14665-
#[simd_test(enable = "neon")]
14666-
unsafe fn test_vshrq_n_s8() {
14667-
let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
14668-
let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
14669-
let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a)));
14670-
assert_eq!(r, e);
14671-
}
14672-
1467314641
#[simd_test(enable = "neon")]
1467414642
unsafe fn test_vshr_n_s16() {
1467514643
let a: i16x4 = i16x4::new(4, 8, 12, 16);
@@ -14726,14 +14694,6 @@ mod test {
1472614694
assert_eq!(r, e);
1472714695
}
1472814696

14729-
#[simd_test(enable = "neon")]
14730-
unsafe fn test_vshrq_n_u8() {
14731-
let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
14732-
let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
14733-
let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a)));
14734-
assert_eq!(r, e);
14735-
}
14736-
1473714697
#[simd_test(enable = "neon")]
1473814698
unsafe fn test_vshr_n_u16() {
1473914699
let a: u16x4 = u16x4::new(4, 8, 12, 16);

crates/core_arch/src/arm/neon/mod.rs

+75-1
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ use crate::mem::align_of;
1010
use crate::{
1111
core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute,
1212
};
13+
use core::convert::TryInto;
1314
#[cfg(test)]
1415
use stdarch_test::assert_instr;
15-
use core::convert::TryInto;
1616

1717
pub(crate) type p8 = u8;
1818
pub(crate) type p16 = u16;
@@ -4412,6 +4412,64 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
44124412
vdupq_n_f32(value)
44134413
}
44144414

4415+
/// Signed shift right
4416+
#[inline]
4417+
#[target_feature(enable = "neon")]
4418+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4419+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
4420+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("sshr", N = 2))]
4421+
#[rustc_legacy_const_generics(1)]
4422+
pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
4423+
static_assert!(N : i32 where N >= 1 && N <= 8);
4424+
int8x16_t(
4425+
a.0 >> N,
4426+
a.1 >> N,
4427+
a.2 >> N,
4428+
a.3 >> N,
4429+
a.4 >> N,
4430+
a.5 >> N,
4431+
a.6 >> N,
4432+
a.7 >> N,
4433+
a.8 >> N,
4434+
a.9 >> N,
4435+
a.10 >> N,
4436+
a.11 >> N,
4437+
a.12 >> N,
4438+
a.13 >> N,
4439+
a.14 >> N,
4440+
a.15 >> N,
4441+
)
4442+
}
4443+
4444+
/// Unsigned shift right
4445+
#[inline]
4446+
#[target_feature(enable = "neon")]
4447+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4448+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
4449+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("ushr", N = 2))]
4450+
#[rustc_legacy_const_generics(1)]
4451+
pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
4452+
static_assert!(N : i32 where N >= 1 && N <= 8);
4453+
uint8x16_t(
4454+
a.0 >> N,
4455+
a.1 >> N,
4456+
a.2 >> N,
4457+
a.3 >> N,
4458+
a.4 >> N,
4459+
a.5 >> N,
4460+
a.6 >> N,
4461+
a.7 >> N,
4462+
a.8 >> N,
4463+
a.9 >> N,
4464+
a.10 >> N,
4465+
a.11 >> N,
4466+
a.12 >> N,
4467+
a.13 >> N,
4468+
a.14 >> N,
4469+
a.15 >> N,
4470+
)
4471+
}
4472+
44154473
/// Extract vector from pair of vectors
44164474
#[inline]
44174475
#[target_feature(enable = "neon")]
@@ -5845,6 +5903,22 @@ mod tests {
58455903
assert_eq!(r, 2);
58465904
}
58475905

5906+
#[simd_test(enable = "neon")]
5907+
unsafe fn test_vshrq_n_s8() {
5908+
let a = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
5909+
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5910+
let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a)));
5911+
assert_eq!(r, e);
5912+
}
5913+
5914+
#[simd_test(enable = "neon")]
5915+
unsafe fn test_vshrq_n_u8() {
5916+
let a = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
5917+
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5918+
let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a)));
5919+
assert_eq!(r, e);
5920+
}
5921+
58485922
#[simd_test(enable = "neon")]
58495923
unsafe fn test_vext_s64() {
58505924
let a: i64x1 = i64x1::new(0);

crates/stdarch-gen/neon.spec

+4-4
Original file line numberDiff line numberDiff line change
@@ -2136,15 +2136,15 @@ n-suffix
21362136
constn = N
21372137
multi_fn = static_assert-N-1-bits
21382138
multi_fn = simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()}
2139-
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
2139+
a = 4, 8, 12, 16, 20, 24, 28, 32
21402140
n = 2
2141-
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
2141+
validate 1, 2, 3, 4, 5, 6, 7, 8
21422142

21432143
arm = vshr.s
21442144
aarch64 = sshr
2145-
generate int*_t, int64x*_t
2145+
generate int8x8_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x*_t
21462146
aarch64 = ushr
2147-
generate uint*_t, uint64x*_t
2147+
generate uint8x8_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x*_t
21482148

21492149
/// Shift right narrow
21502150
name = vshrn_n

0 commit comments

Comments
 (0)