Skip to content

Commit f3b693a

Browse files
committed
use simd_shr for vshrq_n_s8 and vshrq_n_u8
1 parent b0d7df4 commit f3b693a

File tree

3 files changed

+42
-76
lines changed

3 files changed

+42
-76
lines changed

crates/core_arch/src/arm/neon/generated.rs

+40
Original file line numberDiff line numberDiff line change
@@ -8122,6 +8122,18 @@ pub unsafe fn vshr_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
81228122
simd_shr(a, vdup_n_s8(N.try_into().unwrap()))
81238123
}
81248124

8125+
/// Shift right
8126+
#[inline]
8127+
#[target_feature(enable = "neon")]
8128+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
8129+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
8130+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))]
8131+
#[rustc_legacy_const_generics(1)]
8132+
pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
8133+
static_assert!(N : i32 where N >= 1 && N <= 8);
8134+
simd_shr(a, vdupq_n_s8(N.try_into().unwrap()))
8135+
}
8136+
81258137
/// Shift right
81268138
#[inline]
81278139
#[target_feature(enable = "neon")]
@@ -8206,6 +8218,18 @@ pub unsafe fn vshr_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
82068218
simd_shr(a, vdup_n_u8(N.try_into().unwrap()))
82078219
}
82088220

8221+
/// Shift right
8222+
#[inline]
8223+
#[target_feature(enable = "neon")]
8224+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
8225+
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
8226+
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))]
8227+
#[rustc_legacy_const_generics(1)]
8228+
pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
8229+
static_assert!(N : i32 where N >= 1 && N <= 8);
8230+
simd_shr(a, vdupq_n_u8(N.try_into().unwrap()))
8231+
}
8232+
82098233
/// Shift right
82108234
#[inline]
82118235
#[target_feature(enable = "neon")]
@@ -14638,6 +14662,14 @@ mod test {
1463814662
assert_eq!(r, e);
1463914663
}
1464014664

14665+
#[simd_test(enable = "neon")]
14666+
unsafe fn test_vshrq_n_s8() {
14667+
let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32);
14668+
let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8);
14669+
let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a)));
14670+
assert_eq!(r, e);
14671+
}
14672+
1464114673
#[simd_test(enable = "neon")]
1464214674
unsafe fn test_vshr_n_s16() {
1464314675
let a: i16x4 = i16x4::new(4, 8, 12, 16);
@@ -14694,6 +14726,14 @@ mod test {
1469414726
assert_eq!(r, e);
1469514727
}
1469614728

14729+
#[simd_test(enable = "neon")]
14730+
unsafe fn test_vshrq_n_u8() {
14731+
let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32);
14732+
let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8);
14733+
let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a)));
14734+
assert_eq!(r, e);
14735+
}
14736+
1469714737
#[simd_test(enable = "neon")]
1469814738
unsafe fn test_vshr_n_u16() {
1469914739
let a: u16x4 = u16x4::new(4, 8, 12, 16);

crates/core_arch/src/arm/neon/mod.rs

-74
Original file line numberDiff line numberDiff line change
@@ -4412,64 +4412,6 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
44124412
vdupq_n_f32(value)
44134413
}
44144414

4415-
/// Signed shift right
4416-
#[inline]
4417-
#[target_feature(enable = "neon")]
4418-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4419-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))]
4420-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("sshr", N = 2))]
4421-
#[rustc_legacy_const_generics(1)]
4422-
pub unsafe fn vshrq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
4423-
static_assert!(N : i32 where N >= 1 && N <= 8);
4424-
int8x16_t(
4425-
a.0 >> N,
4426-
a.1 >> N,
4427-
a.2 >> N,
4428-
a.3 >> N,
4429-
a.4 >> N,
4430-
a.5 >> N,
4431-
a.6 >> N,
4432-
a.7 >> N,
4433-
a.8 >> N,
4434-
a.9 >> N,
4435-
a.10 >> N,
4436-
a.11 >> N,
4437-
a.12 >> N,
4438-
a.13 >> N,
4439-
a.14 >> N,
4440-
a.15 >> N,
4441-
)
4442-
}
4443-
4444-
/// Unsigned shift right
4445-
#[inline]
4446-
#[target_feature(enable = "neon")]
4447-
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
4448-
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))]
4449-
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("ushr", N = 2))]
4450-
#[rustc_legacy_const_generics(1)]
4451-
pub unsafe fn vshrq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
4452-
static_assert!(N : i32 where N >= 1 && N <= 8);
4453-
uint8x16_t(
4454-
a.0 >> N,
4455-
a.1 >> N,
4456-
a.2 >> N,
4457-
a.3 >> N,
4458-
a.4 >> N,
4459-
a.5 >> N,
4460-
a.6 >> N,
4461-
a.7 >> N,
4462-
a.8 >> N,
4463-
a.9 >> N,
4464-
a.10 >> N,
4465-
a.11 >> N,
4466-
a.12 >> N,
4467-
a.13 >> N,
4468-
a.14 >> N,
4469-
a.15 >> N,
4470-
)
4471-
}
4472-
44734415
/// Extract vector from pair of vectors
44744416
#[inline]
44754417
#[target_feature(enable = "neon")]
@@ -5903,22 +5845,6 @@ mod tests {
59035845
assert_eq!(r, 2);
59045846
}
59055847

5906-
#[simd_test(enable = "neon")]
5907-
unsafe fn test_vshrq_n_s8() {
5908-
let a = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
5909-
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5910-
let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a)));
5911-
assert_eq!(r, e);
5912-
}
5913-
5914-
#[simd_test(enable = "neon")]
5915-
unsafe fn test_vshrq_n_u8() {
5916-
let a = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64);
5917-
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
5918-
let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a)));
5919-
assert_eq!(r, e);
5920-
}
5921-
59225848
#[simd_test(enable = "neon")]
59235849
unsafe fn test_vext_s64() {
59245850
let a: i64x1 = i64x1::new(0);

crates/stdarch-gen/neon.spec

+2-2
Original file line numberDiff line numberDiff line change
@@ -2142,9 +2142,9 @@ validate 1, 2, 3, 4, 5, 6, 7, 8
21422142

21432143
arm = vshr.s
21442144
aarch64 = sshr
2145-
generate int8x8_t, int16x4_t, int16x8_t, int32x2_t, int32x4_t, int64x*_t
2145+
generate int*_t, int64x*_t
21462146
aarch64 = ushr
2147-
generate uint8x8_t, uint16x4_t, uint16x8_t, uint32x2_t, uint32x4_t, uint64x*_t
2147+
generate uint*_t, uint64x*_t
21482148

21492149
/// Shift right narrow
21502150
name = vshrn_n

0 commit comments

Comments
 (0)