Skip to content

Commit d23e2a4

Browse files
authored
Add vrshl, vrshr, vrshrn, vrsra, vsra neon instructions (rust-lang#1127)
1 parent 796bfdf commit d23e2a4

File tree

5 files changed

+6278
-4073
lines changed

5 files changed

+6278
-4073
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4605,6 +4605,140 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
46054605
transmute(a)
46064606
}
46074607

4608+
/// Signed rounding shift left
4609+
#[inline]
4610+
#[target_feature(enable = "neon")]
4611+
#[cfg_attr(test, assert_instr(srshl))]
4612+
pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
4613+
transmute(vrshl_s64(transmute(a), transmute(b)))
4614+
}
4615+
4616+
/// Unsigned rounding shift left
4617+
#[inline]
4618+
#[target_feature(enable = "neon")]
4619+
#[cfg_attr(test, assert_instr(urshl))]
4620+
pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
4621+
transmute(vrshl_u64(transmute(a), transmute(b)))
4622+
}
4623+
4624+
/// Signed rounding shift right
4625+
#[inline]
4626+
#[target_feature(enable = "neon")]
4627+
#[cfg_attr(test, assert_instr(srshr, N = 2))]
4628+
#[rustc_legacy_const_generics(1)]
4629+
pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
4630+
static_assert!(N : i32 where N >= 1 && N <= 64);
4631+
vrshld_s64(a, -N as i64)
4632+
}
4633+
4634+
/// Unsigned rounding shift right
4635+
#[inline]
4636+
#[target_feature(enable = "neon")]
4637+
#[cfg_attr(test, assert_instr(urshr, N = 2))]
4638+
#[rustc_legacy_const_generics(1)]
4639+
pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
4640+
static_assert!(N : i32 where N >= 1 && N <= 64);
4641+
vrshld_u64(a, -N as i64)
4642+
}
4643+
4644+
/// Rounding shift right narrow
4645+
#[inline]
4646+
#[target_feature(enable = "neon")]
4647+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4648+
#[rustc_legacy_const_generics(2)]
4649+
pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
4650+
static_assert!(N : i32 where N >= 1 && N <= 8);
4651+
simd_shuffle16(a, vrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
4652+
}
4653+
4654+
/// Rounding shift right narrow
4655+
#[inline]
4656+
#[target_feature(enable = "neon")]
4657+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4658+
#[rustc_legacy_const_generics(2)]
4659+
pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
4660+
static_assert!(N : i32 where N >= 1 && N <= 16);
4661+
simd_shuffle8(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
4662+
}
4663+
4664+
/// Rounding shift right narrow
4665+
#[inline]
4666+
#[target_feature(enable = "neon")]
4667+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4668+
#[rustc_legacy_const_generics(2)]
4669+
pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
4670+
static_assert!(N : i32 where N >= 1 && N <= 32);
4671+
simd_shuffle4(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
4672+
}
4673+
4674+
/// Rounding shift right narrow
4675+
#[inline]
4676+
#[target_feature(enable = "neon")]
4677+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4678+
#[rustc_legacy_const_generics(2)]
4679+
pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
4680+
static_assert!(N : i32 where N >= 1 && N <= 8);
4681+
simd_shuffle16(a, vrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
4682+
}
4683+
4684+
/// Rounding shift right narrow
4685+
#[inline]
4686+
#[target_feature(enable = "neon")]
4687+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4688+
#[rustc_legacy_const_generics(2)]
4689+
pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
4690+
static_assert!(N : i32 where N >= 1 && N <= 16);
4691+
simd_shuffle8(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
4692+
}
4693+
4694+
/// Rounding shift right narrow
4695+
#[inline]
4696+
#[target_feature(enable = "neon")]
4697+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4698+
#[rustc_legacy_const_generics(2)]
4699+
pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
4700+
static_assert!(N : i32 where N >= 1 && N <= 32);
4701+
simd_shuffle4(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
4702+
}
4703+
4704+
/// Signed rounding shift right and accumulate.
4705+
#[inline]
4706+
#[target_feature(enable = "neon")]
4707+
#[cfg_attr(test, assert_instr(nop, N = 2))]
4708+
#[rustc_legacy_const_generics(2)]
4709+
pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
4710+
static_assert!(N : i32 where N >= 1 && N <= 64);
4711+
let b: int64x1_t = vrshr_n_s64::<N>(transmute(b));
4712+
transmute(simd_add(transmute(a), b))
4713+
}
4714+
4715+
/// Ungisned rounding shift right and accumulate.
4716+
#[inline]
4717+
#[target_feature(enable = "neon")]
4718+
#[cfg_attr(test, assert_instr(nop, N = 2))]
4719+
#[rustc_legacy_const_generics(2)]
4720+
pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
4721+
static_assert!(N : i32 where N >= 1 && N <= 64);
4722+
let b: uint64x1_t = vrshr_n_u64::<N>(transmute(b));
4723+
transmute(simd_add(transmute(a), b))
4724+
}
4725+
4726+
/// Signed Shift left
4727+
#[inline]
4728+
#[target_feature(enable = "neon")]
4729+
#[cfg_attr(test, assert_instr(sshl))]
4730+
pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
4731+
transmute(vshl_s64(transmute(a), transmute(b)))
4732+
}
4733+
4734+
/// Unsigned Shift left
4735+
#[inline]
4736+
#[target_feature(enable = "neon")]
4737+
#[cfg_attr(test, assert_instr(ushl))]
4738+
pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
4739+
transmute(vshl_u64(transmute(a), transmute(b)))
4740+
}
4741+
46084742
/// Signed shift left long
46094743
#[inline]
46104744
#[target_feature(enable = "neon")]
@@ -9872,6 +10006,130 @@ mod test {
987210006
assert_eq!(r, e);
987310007
}
987410008

10009+
#[simd_test(enable = "neon")]
10010+
unsafe fn test_vrshld_s64() {
10011+
let a: i64 = 1;
10012+
let b: i64 = 2;
10013+
let e: i64 = 4;
10014+
let r: i64 = transmute(vrshld_s64(transmute(a), transmute(b)));
10015+
assert_eq!(r, e);
10016+
}
10017+
10018+
#[simd_test(enable = "neon")]
10019+
unsafe fn test_vrshld_u64() {
10020+
let a: u64 = 1;
10021+
let b: i64 = 2;
10022+
let e: u64 = 4;
10023+
let r: u64 = transmute(vrshld_u64(transmute(a), transmute(b)));
10024+
assert_eq!(r, e);
10025+
}
10026+
10027+
#[simd_test(enable = "neon")]
10028+
unsafe fn test_vrshrd_n_s64() {
10029+
let a: i64 = 4;
10030+
let e: i64 = 1;
10031+
let r: i64 = transmute(vrshrd_n_s64::<2>(transmute(a)));
10032+
assert_eq!(r, e);
10033+
}
10034+
10035+
#[simd_test(enable = "neon")]
10036+
unsafe fn test_vrshrd_n_u64() {
10037+
let a: u64 = 4;
10038+
let e: u64 = 1;
10039+
let r: u64 = transmute(vrshrd_n_u64::<2>(transmute(a)));
10040+
assert_eq!(r, e);
10041+
}
10042+
10043+
#[simd_test(enable = "neon")]
10044+
unsafe fn test_vrshrn_high_n_s16() {
10045+
let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10046+
let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
10047+
let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
10048+
let r: i8x16 = transmute(vrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
10049+
assert_eq!(r, e);
10050+
}
10051+
10052+
#[simd_test(enable = "neon")]
10053+
unsafe fn test_vrshrn_high_n_s32() {
10054+
let a: i16x4 = i16x4::new(0, 1, 8, 9);
10055+
let b: i32x4 = i32x4::new(32, 36, 40, 44);
10056+
let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10057+
let r: i16x8 = transmute(vrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
10058+
assert_eq!(r, e);
10059+
}
10060+
10061+
#[simd_test(enable = "neon")]
10062+
unsafe fn test_vrshrn_high_n_s64() {
10063+
let a: i32x2 = i32x2::new(0, 1);
10064+
let b: i64x2 = i64x2::new(32, 36);
10065+
let e: i32x4 = i32x4::new(0, 1, 8, 9);
10066+
let r: i32x4 = transmute(vrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
10067+
assert_eq!(r, e);
10068+
}
10069+
10070+
#[simd_test(enable = "neon")]
10071+
unsafe fn test_vrshrn_high_n_u16() {
10072+
let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10073+
let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
10074+
let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
10075+
let r: u8x16 = transmute(vrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
10076+
assert_eq!(r, e);
10077+
}
10078+
10079+
#[simd_test(enable = "neon")]
10080+
unsafe fn test_vrshrn_high_n_u32() {
10081+
let a: u16x4 = u16x4::new(0, 1, 8, 9);
10082+
let b: u32x4 = u32x4::new(32, 36, 40, 44);
10083+
let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10084+
let r: u16x8 = transmute(vrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
10085+
assert_eq!(r, e);
10086+
}
10087+
10088+
#[simd_test(enable = "neon")]
10089+
unsafe fn test_vrshrn_high_n_u64() {
10090+
let a: u32x2 = u32x2::new(0, 1);
10091+
let b: u64x2 = u64x2::new(32, 36);
10092+
let e: u32x4 = u32x4::new(0, 1, 8, 9);
10093+
let r: u32x4 = transmute(vrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
10094+
assert_eq!(r, e);
10095+
}
10096+
10097+
#[simd_test(enable = "neon")]
10098+
unsafe fn test_vrsrad_n_s64() {
10099+
let a: i64 = 1;
10100+
let b: i64 = 4;
10101+
let e: i64 = 2;
10102+
let r: i64 = transmute(vrsrad_n_s64::<2>(transmute(a), transmute(b)));
10103+
assert_eq!(r, e);
10104+
}
10105+
10106+
#[simd_test(enable = "neon")]
10107+
unsafe fn test_vrsrad_n_u64() {
10108+
let a: u64 = 1;
10109+
let b: u64 = 4;
10110+
let e: u64 = 2;
10111+
let r: u64 = transmute(vrsrad_n_u64::<2>(transmute(a), transmute(b)));
10112+
assert_eq!(r, e);
10113+
}
10114+
10115+
#[simd_test(enable = "neon")]
10116+
unsafe fn test_vshld_s64() {
10117+
let a: i64 = 1;
10118+
let b: i64 = 2;
10119+
let e: i64 = 4;
10120+
let r: i64 = transmute(vshld_s64(transmute(a), transmute(b)));
10121+
assert_eq!(r, e);
10122+
}
10123+
10124+
#[simd_test(enable = "neon")]
10125+
unsafe fn test_vshld_u64() {
10126+
let a: u64 = 1;
10127+
let b: i64 = 2;
10128+
let e: u64 = 4;
10129+
let r: u64 = transmute(vshld_u64(transmute(a), transmute(b)));
10130+
assert_eq!(r, e);
10131+
}
10132+
987510133
#[simd_test(enable = "neon")]
987610134
unsafe fn test_vshll_high_n_s8() {
987710135
let a: i8x16 = i8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8);

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2447,6 +2447,66 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol
24472447
))
24482448
}
24492449

2450+
/// Shift left
2451+
#[inline]
2452+
#[target_feature(enable = "neon")]
2453+
#[cfg_attr(test, assert_instr(nop, N = 2))]
2454+
#[rustc_legacy_const_generics(1)]
2455+
pub unsafe fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
2456+
static_assert_imm6!(N);
2457+
a << N
2458+
}
2459+
2460+
/// Shift left
2461+
#[inline]
2462+
#[target_feature(enable = "neon")]
2463+
#[cfg_attr(test, assert_instr(nop, N = 2))]
2464+
#[rustc_legacy_const_generics(1)]
2465+
pub unsafe fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
2466+
static_assert_imm6!(N);
2467+
a << N
2468+
}
2469+
2470+
/// Signed shift right
2471+
#[inline]
2472+
#[target_feature(enable = "neon")]
2473+
#[cfg_attr(test, assert_instr(nop, N = 2))]
2474+
#[rustc_legacy_const_generics(1)]
2475+
pub unsafe fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
2476+
static_assert!(N : i32 where N >= 1 && N <= 64);
2477+
a >> N
2478+
}
2479+
2480+
/// Unsigned shift right
2481+
#[inline]
2482+
#[target_feature(enable = "neon")]
2483+
#[cfg_attr(test, assert_instr(nop, N = 2))]
2484+
#[rustc_legacy_const_generics(1)]
2485+
pub unsafe fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
2486+
static_assert!(N : i32 where N >= 1 && N <= 64);
2487+
a >> N
2488+
}
2489+
2490+
/// Signed shift right and accumulate
2491+
#[inline]
2492+
#[target_feature(enable = "neon")]
2493+
#[cfg_attr(test, assert_instr(nop, N = 2))]
2494+
#[rustc_legacy_const_generics(2)]
2495+
pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
2496+
static_assert!(N : i32 where N >= 1 && N <= 64);
2497+
a + (b >> N)
2498+
}
2499+
2500+
/// Unsigned shift right and accumulate
2501+
#[inline]
2502+
#[target_feature(enable = "neon")]
2503+
#[cfg_attr(test, assert_instr(nop, N = 2))]
2504+
#[rustc_legacy_const_generics(2)]
2505+
pub unsafe fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
2506+
static_assert!(N : i32 where N >= 1 && N <= 64);
2507+
a + (b >> N)
2508+
}
2509+
24502510
/// Shift Left and Insert (immediate)
24512511
#[inline]
24522512
#[target_feature(enable = "neon")]
@@ -3512,6 +3572,56 @@ mod tests {
35123572
assert_eq!(r, e);
35133573
}
35143574

3575+
#[simd_test(enable = "neon")]
3576+
unsafe fn test_vshld_n_s64() {
3577+
let a: i64 = 1;
3578+
let e: i64 = 4;
3579+
let r: i64 = vshld_n_s64::<2>(a);
3580+
assert_eq!(r, e);
3581+
}
3582+
3583+
#[simd_test(enable = "neon")]
3584+
unsafe fn test_vshld_n_u64() {
3585+
let a: u64 = 1;
3586+
let e: u64 = 4;
3587+
let r: u64 = vshld_n_u64::<2>(a);
3588+
assert_eq!(r, e);
3589+
}
3590+
3591+
#[simd_test(enable = "neon")]
3592+
unsafe fn test_vshrd_n_s64() {
3593+
let a: i64 = 4;
3594+
let e: i64 = 1;
3595+
let r: i64 = vshrd_n_s64::<2>(a);
3596+
assert_eq!(r, e);
3597+
}
3598+
3599+
#[simd_test(enable = "neon")]
3600+
unsafe fn test_vshrd_n_u64() {
3601+
let a: u64 = 4;
3602+
let e: u64 = 1;
3603+
let r: u64 = vshrd_n_u64::<2>(a);
3604+
assert_eq!(r, e);
3605+
}
3606+
3607+
#[simd_test(enable = "neon")]
3608+
unsafe fn test_vsrad_n_s64() {
3609+
let a: i64 = 1;
3610+
let b: i64 = 4;
3611+
let e: i64 = 2;
3612+
let r: i64 = vsrad_n_s64::<2>(a, b);
3613+
assert_eq!(r, e);
3614+
}
3615+
3616+
#[simd_test(enable = "neon")]
3617+
unsafe fn test_vsrad_n_u64() {
3618+
let a: u64 = 1;
3619+
let b: u64 = 4;
3620+
let e: u64 = 2;
3621+
let r: u64 = vsrad_n_u64::<2>(a, b);
3622+
assert_eq!(r, e);
3623+
}
3624+
35153625
macro_rules! test_vcombine {
35163626
($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
35173627
#[allow(unused_assignments)]

0 commit comments

Comments
 (0)