Skip to content

Commit 325d722

Browse files
authored
add neon instruction vaddlv_* (rust-lang#1129)
1 parent d23e2a4 commit 325d722

File tree

3 files changed

+307
-0
lines changed

3 files changed

+307
-0
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2876,6 +2876,110 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
28762876
simd_sub(a, b)
28772877
}
28782878

2879+
/// Signed Add Long across Vector
2880+
#[inline]
2881+
#[target_feature(enable = "neon")]
2882+
#[cfg_attr(test, assert_instr(saddlv))]
2883+
pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
2884+
#[allow(improper_ctypes)]
2885+
extern "C" {
2886+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
2887+
fn vaddlv_s16_(a: int16x4_t) -> i32;
2888+
}
2889+
vaddlv_s16_(a)
2890+
}
2891+
2892+
/// Signed Add Long across Vector
2893+
#[inline]
2894+
#[target_feature(enable = "neon")]
2895+
#[cfg_attr(test, assert_instr(saddlv))]
2896+
pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
2897+
#[allow(improper_ctypes)]
2898+
extern "C" {
2899+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
2900+
fn vaddlvq_s16_(a: int16x8_t) -> i32;
2901+
}
2902+
vaddlvq_s16_(a)
2903+
}
2904+
2905+
/// Signed Add Long across Vector
2906+
#[inline]
2907+
#[target_feature(enable = "neon")]
2908+
#[cfg_attr(test, assert_instr(saddlp))]
2909+
pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
2910+
#[allow(improper_ctypes)]
2911+
extern "C" {
2912+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
2913+
fn vaddlv_s32_(a: int32x2_t) -> i64;
2914+
}
2915+
vaddlv_s32_(a)
2916+
}
2917+
2918+
/// Signed Add Long across Vector
2919+
#[inline]
2920+
#[target_feature(enable = "neon")]
2921+
#[cfg_attr(test, assert_instr(saddlv))]
2922+
pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
2923+
#[allow(improper_ctypes)]
2924+
extern "C" {
2925+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v4i32")]
2926+
fn vaddlvq_s32_(a: int32x4_t) -> i64;
2927+
}
2928+
vaddlvq_s32_(a)
2929+
}
2930+
2931+
/// Unsigned Add Long across Vector
2932+
#[inline]
2933+
#[target_feature(enable = "neon")]
2934+
#[cfg_attr(test, assert_instr(uaddlv))]
2935+
pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
2936+
#[allow(improper_ctypes)]
2937+
extern "C" {
2938+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16")]
2939+
fn vaddlv_u16_(a: uint16x4_t) -> u32;
2940+
}
2941+
vaddlv_u16_(a)
2942+
}
2943+
2944+
/// Unsigned Add Long across Vector
2945+
#[inline]
2946+
#[target_feature(enable = "neon")]
2947+
#[cfg_attr(test, assert_instr(uaddlv))]
2948+
pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
2949+
#[allow(improper_ctypes)]
2950+
extern "C" {
2951+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16")]
2952+
fn vaddlvq_u16_(a: uint16x8_t) -> u32;
2953+
}
2954+
vaddlvq_u16_(a)
2955+
}
2956+
2957+
/// Unsigned Add Long across Vector
2958+
#[inline]
2959+
#[target_feature(enable = "neon")]
2960+
#[cfg_attr(test, assert_instr(uaddlp))]
2961+
pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
2962+
#[allow(improper_ctypes)]
2963+
extern "C" {
2964+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32")]
2965+
fn vaddlv_u32_(a: uint32x2_t) -> u64;
2966+
}
2967+
vaddlv_u32_(a)
2968+
}
2969+
2970+
/// Unsigned Add Long across Vector
2971+
#[inline]
2972+
#[target_feature(enable = "neon")]
2973+
#[cfg_attr(test, assert_instr(uaddlv))]
2974+
pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
2975+
#[allow(improper_ctypes)]
2976+
extern "C" {
2977+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32")]
2978+
fn vaddlvq_u32_(a: uint32x4_t) -> u64;
2979+
}
2980+
vaddlvq_u32_(a)
2981+
}
2982+
28792983
/// Signed Subtract Wide
28802984
#[inline]
28812985
#[target_feature(enable = "neon")]
@@ -8458,6 +8562,70 @@ mod test {
84588562
assert_eq!(r, e);
84598563
}
84608564

8565+
#[simd_test(enable = "neon")]
8566+
unsafe fn test_vaddlv_s16() {
8567+
let a: i16x4 = i16x4::new(1, 2, 3, 4);
8568+
let e: i32 = 10;
8569+
let r: i32 = transmute(vaddlv_s16(transmute(a)));
8570+
assert_eq!(r, e);
8571+
}
8572+
8573+
#[simd_test(enable = "neon")]
8574+
unsafe fn test_vaddlvq_s16() {
8575+
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
8576+
let e: i32 = 36;
8577+
let r: i32 = transmute(vaddlvq_s16(transmute(a)));
8578+
assert_eq!(r, e);
8579+
}
8580+
8581+
#[simd_test(enable = "neon")]
8582+
unsafe fn test_vaddlv_s32() {
8583+
let a: i32x2 = i32x2::new(1, 2);
8584+
let e: i64 = 3;
8585+
let r: i64 = transmute(vaddlv_s32(transmute(a)));
8586+
assert_eq!(r, e);
8587+
}
8588+
8589+
#[simd_test(enable = "neon")]
8590+
unsafe fn test_vaddlvq_s32() {
8591+
let a: i32x4 = i32x4::new(1, 2, 3, 4);
8592+
let e: i64 = 10;
8593+
let r: i64 = transmute(vaddlvq_s32(transmute(a)));
8594+
assert_eq!(r, e);
8595+
}
8596+
8597+
#[simd_test(enable = "neon")]
8598+
unsafe fn test_vaddlv_u16() {
8599+
let a: u16x4 = u16x4::new(1, 2, 3, 4);
8600+
let e: u32 = 10;
8601+
let r: u32 = transmute(vaddlv_u16(transmute(a)));
8602+
assert_eq!(r, e);
8603+
}
8604+
8605+
#[simd_test(enable = "neon")]
8606+
unsafe fn test_vaddlvq_u16() {
8607+
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
8608+
let e: u32 = 36;
8609+
let r: u32 = transmute(vaddlvq_u16(transmute(a)));
8610+
assert_eq!(r, e);
8611+
}
8612+
8613+
#[simd_test(enable = "neon")]
8614+
unsafe fn test_vaddlv_u32() {
8615+
let a: u32x2 = u32x2::new(1, 2);
8616+
let e: u64 = 3;
8617+
let r: u64 = transmute(vaddlv_u32(transmute(a)));
8618+
assert_eq!(r, e);
8619+
}
8620+
8621+
#[simd_test(enable = "neon")]
8622+
unsafe fn test_vaddlvq_u32() {
8623+
let a: u32x4 = u32x4::new(1, 2, 3, 4);
8624+
let e: u64 = 10;
8625+
let r: u64 = transmute(vaddlvq_u32(transmute(a)));
8626+
assert_eq!(r, e);
8627+
}
8628+
84618629
#[simd_test(enable = "neon")]
84628630
unsafe fn test_vsubw_high_s8() {
84638631
let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,15 @@ extern "C" {
147147
#[link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"]
148148
fn vaddvq_u64_(a: uint64x2_t) -> u64;
149149

150+
#[link_name = "llvm.aarch64.neon.saddlv.i32.v8i8"]
151+
fn vaddlv_s8_(a: int8x8_t) -> i32;
152+
#[link_name = "llvm.aarch64.neon.uaddlv.i32.v8i8"]
153+
fn vaddlv_u8_(a: uint8x8_t) -> u32;
154+
#[link_name = "llvm.aarch64.neon.saddlv.i32.v16i8"]
155+
fn vaddlvq_s8_(a: int8x16_t) -> i32;
156+
#[link_name = "llvm.aarch64.neon.uaddlv.i32.v16i8"]
157+
fn vaddlvq_u8_(a: uint8x16_t) -> u32;
158+
150159
#[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"]
151160
fn vmaxv_s8_(a: int8x8_t) -> i8;
152161
#[link_name = "llvm.aarch64.neon.smaxv.i8.6i8"]
@@ -1000,6 +1009,35 @@ pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 {
10001009
vaddvq_u64_(a)
10011010
}
10021011

1012+
/// Signed Add Long across Vector
1013+
#[inline]
1014+
#[target_feature(enable = "neon")]
1015+
#[cfg_attr(test, assert_instr(saddlv))]
1016+
pub unsafe fn vaddlv_s8(a: int8x8_t) -> i16 {
1017+
vaddlv_s8_(a) as i16
1018+
}
1019+
/// Signed Add Long across Vector
1020+
#[inline]
1021+
#[target_feature(enable = "neon")]
1022+
#[cfg_attr(test, assert_instr(saddlv))]
1023+
pub unsafe fn vaddlvq_s8(a: int8x16_t) -> i16 {
1024+
vaddlvq_s8_(a) as i16
1025+
}
1026+
/// Unsigned Add Long across Vector
1027+
#[inline]
1028+
#[target_feature(enable = "neon")]
1029+
#[cfg_attr(test, assert_instr(uaddlv))]
1030+
pub unsafe fn vaddlv_u8(a: uint8x8_t) -> u16 {
1031+
vaddlv_u8_(a) as u16
1032+
}
1033+
/// Unsigned Add Long across Vector
1034+
#[inline]
1035+
#[target_feature(enable = "neon")]
1036+
#[cfg_attr(test, assert_instr(uaddlv))]
1037+
pub unsafe fn vaddlvq_u8(a: uint8x16_t) -> u16 {
1038+
vaddlvq_u8_(a) as u16
1039+
}
1040+
10031041
/// Polynomial multiply long
10041042
#[inline]
10051043
#[target_feature(enable = "neon")]
@@ -4367,6 +4405,35 @@ mod tests {
43674405
let e = 3_u64;
43684406
assert_eq!(r, e);
43694407
}
4408+
4409+
#[simd_test(enable = "neon")]
4410+
unsafe fn test_vaddlv_s8() {
4411+
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, -8);
4412+
let r: i16 = vaddlv_s8(transmute(a));
4413+
let e = 20_i16;
4414+
assert_eq!(r, e);
4415+
}
4416+
#[simd_test(enable = "neon")]
4417+
unsafe fn test_vaddlv_u8() {
4418+
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
4419+
let r: u16 = vaddlv_u8(transmute(a));
4420+
let e = 36_u16;
4421+
assert_eq!(r, e);
4422+
}
4423+
#[simd_test(enable = "neon")]
4424+
unsafe fn test_vaddlvq_s8() {
4425+
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -16);
4426+
let r: i16 = vaddlvq_s8(transmute(a));
4427+
let e = 104_i16;
4428+
assert_eq!(r, e);
4429+
}
4430+
#[simd_test(enable = "neon")]
4431+
unsafe fn test_vaddlvq_u8() {
4432+
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
4433+
let r: u16 = vaddlvq_u8(transmute(a));
4434+
let e = 136_u16;
4435+
assert_eq!(r, e);
4436+
}
43704437
}
43714438

43724439
#[cfg(test)]

crates/stdarch-gen/neon.spec

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1655,6 +1655,78 @@ generate float64x*_t
16551655
arm = vsub.
16561656
generate float*_t
16571657

1658+
/// Signed Add Long across Vector
1659+
name = vaddlv
1660+
a = 1, 2, 3, 4
1661+
validate 10
1662+
1663+
aarch64 = saddlv
1664+
link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
1665+
generate int16x4_t:i32
1666+
1667+
/// Signed Add Long across Vector
1668+
name = vaddlv
1669+
a = 1, 2, 3, 4, 5, 6, 7, 8
1670+
validate 36
1671+
1672+
aarch64 = saddlv
1673+
link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
1674+
generate int16x8_t:i32
1675+
1676+
/// Signed Add Long across Vector
1677+
name = vaddlv
1678+
a = 1, 2
1679+
validate 3
1680+
1681+
aarch64 = saddlp
1682+
link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
1683+
generate int32x2_t:i64
1684+
1685+
/// Signed Add Long across Vector
1686+
name = vaddlv
1687+
a = 1, 2, 3, 4
1688+
validate 10
1689+
1690+
aarch64 = saddlv
1691+
link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
1692+
generate int32x4_t:i64
1693+
1694+
/// Unsigned Add Long across Vector
1695+
name = vaddlv
1696+
a = 1, 2, 3, 4
1697+
validate 10
1698+
1699+
aarch64 = uaddlv
1700+
link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
1701+
generate uint16x4_t:u32
1702+
1703+
/// Unsigned Add Long across Vector
1704+
name = vaddlv
1705+
a = 1, 2, 3, 4, 5, 6, 7, 8
1706+
validate 36
1707+
1708+
aarch64 = uaddlv
1709+
link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
1710+
generate uint16x8_t:u32
1711+
1712+
/// Unsigned Add Long across Vector
1713+
name = vaddlv
1714+
a = 1, 2
1715+
validate 3
1716+
1717+
aarch64 = uaddlp
1718+
link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
1719+
generate uint32x2_t:u64
1720+
1721+
/// Unsigned Add Long across Vector
1722+
name = vaddlv
1723+
a = 1, 2, 3, 4
1724+
validate 10
1725+
1726+
aarch64 = uaddlv
1727+
link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
1728+
generate uint32x4_t:u64
1729+
16581730
/// Subtract returning high narrow
16591731
name = vsubhn
16601732
no-q

0 commit comments

Comments
 (0)