Skip to content

Commit 5260eb1

Browse files
authored
add neon instruction vsubw_* and vsubl_* (#1112)
1 parent 768b238 commit 5260eb1

File tree

3 files changed

+696
-0
lines changed

3 files changed

+696
-0
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

+234
Original file line numberDiff line numberDiff line change
@@ -2512,6 +2512,132 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
25122512
simd_sub(a, b)
25132513
}
25142514

2515+
/// Signed Subtract Wide
2516+
#[inline]
2517+
#[target_feature(enable = "neon")]
2518+
#[cfg_attr(test, assert_instr(ssubw))]
2519+
pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
2520+
let c: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2521+
simd_sub(a, simd_cast(c))
2522+
}
2523+
2524+
/// Signed Subtract Wide
2525+
#[inline]
2526+
#[target_feature(enable = "neon")]
2527+
#[cfg_attr(test, assert_instr(ssubw))]
2528+
pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
2529+
let c: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2530+
simd_sub(a, simd_cast(c))
2531+
}
2532+
2533+
/// Signed Subtract Wide
2534+
#[inline]
2535+
#[target_feature(enable = "neon")]
2536+
#[cfg_attr(test, assert_instr(ssubw))]
2537+
pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
2538+
let c: int32x2_t = simd_shuffle2(b, b, [2, 3]);
2539+
simd_sub(a, simd_cast(c))
2540+
}
2541+
2542+
/// Unsigned Subtract Wide
2543+
#[inline]
2544+
#[target_feature(enable = "neon")]
2545+
#[cfg_attr(test, assert_instr(usubw))]
2546+
pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
2547+
let c: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2548+
simd_sub(a, simd_cast(c))
2549+
}
2550+
2551+
/// Unsigned Subtract Wide
2552+
#[inline]
2553+
#[target_feature(enable = "neon")]
2554+
#[cfg_attr(test, assert_instr(usubw))]
2555+
pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
2556+
let c: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2557+
simd_sub(a, simd_cast(c))
2558+
}
2559+
2560+
/// Unsigned Subtract Wide
2561+
#[inline]
2562+
#[target_feature(enable = "neon")]
2563+
#[cfg_attr(test, assert_instr(usubw))]
2564+
pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
2565+
let c: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
2566+
simd_sub(a, simd_cast(c))
2567+
}
2568+
2569+
/// Signed Subtract Long
2570+
#[inline]
2571+
#[target_feature(enable = "neon")]
2572+
#[cfg_attr(test, assert_instr(ssubl))]
2573+
pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
2574+
let c: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
2575+
let d: int16x8_t = simd_cast(c);
2576+
let e: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2577+
let f: int16x8_t = simd_cast(e);
2578+
simd_sub(d, f)
2579+
}
2580+
2581+
/// Signed Subtract Long
2582+
#[inline]
2583+
#[target_feature(enable = "neon")]
2584+
#[cfg_attr(test, assert_instr(ssubl))]
2585+
pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
2586+
let c: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
2587+
let d: int32x4_t = simd_cast(c);
2588+
let e: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2589+
let f: int32x4_t = simd_cast(e);
2590+
simd_sub(d, f)
2591+
}
2592+
2593+
/// Signed Subtract Long
2594+
#[inline]
2595+
#[target_feature(enable = "neon")]
2596+
#[cfg_attr(test, assert_instr(ssubl))]
2597+
pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
2598+
let c: int32x2_t = simd_shuffle2(a, a, [2, 3]);
2599+
let d: int64x2_t = simd_cast(c);
2600+
let e: int32x2_t = simd_shuffle2(b, b, [2, 3]);
2601+
let f: int64x2_t = simd_cast(e);
2602+
simd_sub(d, f)
2603+
}
2604+
2605+
/// Unsigned Subtract Long
2606+
#[inline]
2607+
#[target_feature(enable = "neon")]
2608+
#[cfg_attr(test, assert_instr(usubl))]
2609+
pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
2610+
let c: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
2611+
let d: uint16x8_t = simd_cast(c);
2612+
let e: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2613+
let f: uint16x8_t = simd_cast(e);
2614+
simd_sub(d, f)
2615+
}
2616+
2617+
/// Unsigned Subtract Long
2618+
#[inline]
2619+
#[target_feature(enable = "neon")]
2620+
#[cfg_attr(test, assert_instr(usubl))]
2621+
pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
2622+
let c: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
2623+
let d: uint32x4_t = simd_cast(c);
2624+
let e: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2625+
let f: uint32x4_t = simd_cast(e);
2626+
simd_sub(d, f)
2627+
}
2628+
2629+
/// Unsigned Subtract Long
2630+
#[inline]
2631+
#[target_feature(enable = "neon")]
2632+
#[cfg_attr(test, assert_instr(usubl))]
2633+
pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
2634+
let c: uint32x2_t = simd_shuffle2(a, a, [2, 3]);
2635+
let d: uint64x2_t = simd_cast(c);
2636+
let e: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
2637+
let f: uint64x2_t = simd_cast(e);
2638+
simd_sub(d, f)
2639+
}
2640+
25152641
/// Maximum (vector)
25162642
#[inline]
25172643
#[target_feature(enable = "neon")]
@@ -6459,6 +6585,114 @@ mod test {
64596585
assert_eq!(r, e);
64606586
}
64616587

6588+
#[simd_test(enable = "neon")]
6589+
unsafe fn test_vsubw_high_s8() {
6590+
let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);
6591+
let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16);
6592+
let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
6593+
let r: i16x8 = transmute(vsubw_high_s8(transmute(a), transmute(b)));
6594+
assert_eq!(r, e);
6595+
}
6596+
6597+
#[simd_test(enable = "neon")]
6598+
unsafe fn test_vsubw_high_s16() {
6599+
let a: i32x4 = i32x4::new(8, 9, 10, 11);
6600+
let b: i16x8 = i16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
6601+
let e: i32x4 = i32x4::new(0, 0, 0, 0);
6602+
let r: i32x4 = transmute(vsubw_high_s16(transmute(a), transmute(b)));
6603+
assert_eq!(r, e);
6604+
}
6605+
6606+
#[simd_test(enable = "neon")]
6607+
unsafe fn test_vsubw_high_s32() {
6608+
let a: i64x2 = i64x2::new(8, 9);
6609+
let b: i32x4 = i32x4::new(6, 7, 8, 9);
6610+
let e: i64x2 = i64x2::new(0, 0);
6611+
let r: i64x2 = transmute(vsubw_high_s32(transmute(a), transmute(b)));
6612+
assert_eq!(r, e);
6613+
}
6614+
6615+
#[simd_test(enable = "neon")]
6616+
unsafe fn test_vsubw_high_u8() {
6617+
let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
6618+
let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
6619+
let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
6620+
let r: u16x8 = transmute(vsubw_high_u8(transmute(a), transmute(b)));
6621+
assert_eq!(r, e);
6622+
}
6623+
6624+
#[simd_test(enable = "neon")]
6625+
unsafe fn test_vsubw_high_u16() {
6626+
let a: u32x4 = u32x4::new(8, 9, 10, 11);
6627+
let b: u16x8 = u16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
6628+
let e: u32x4 = u32x4::new(0, 0, 0, 0);
6629+
let r: u32x4 = transmute(vsubw_high_u16(transmute(a), transmute(b)));
6630+
assert_eq!(r, e);
6631+
}
6632+
6633+
#[simd_test(enable = "neon")]
6634+
unsafe fn test_vsubw_high_u32() {
6635+
let a: u64x2 = u64x2::new(8, 9);
6636+
let b: u32x4 = u32x4::new(6, 7, 8, 9);
6637+
let e: u64x2 = u64x2::new(0, 0);
6638+
let r: u64x2 = transmute(vsubw_high_u32(transmute(a), transmute(b)));
6639+
assert_eq!(r, e);
6640+
}
6641+
6642+
#[simd_test(enable = "neon")]
6643+
unsafe fn test_vsubl_high_s8() {
6644+
let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
6645+
let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
6646+
let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
6647+
let r: i16x8 = transmute(vsubl_high_s8(transmute(a), transmute(b)));
6648+
assert_eq!(r, e);
6649+
}
6650+
6651+
#[simd_test(enable = "neon")]
6652+
unsafe fn test_vsubl_high_s16() {
6653+
let a: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
6654+
let b: i16x8 = i16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
6655+
let e: i32x4 = i32x4::new(4, 5, 6, 7);
6656+
let r: i32x4 = transmute(vsubl_high_s16(transmute(a), transmute(b)));
6657+
assert_eq!(r, e);
6658+
}
6659+
6660+
#[simd_test(enable = "neon")]
6661+
unsafe fn test_vsubl_high_s32() {
6662+
let a: i32x4 = i32x4::new(12, 13, 14, 15);
6663+
let b: i32x4 = i32x4::new(6, 6, 8, 8);
6664+
let e: i64x2 = i64x2::new(6, 7);
6665+
let r: i64x2 = transmute(vsubl_high_s32(transmute(a), transmute(b)));
6666+
assert_eq!(r, e);
6667+
}
6668+
6669+
#[simd_test(enable = "neon")]
6670+
unsafe fn test_vsubl_high_u8() {
6671+
let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
6672+
let b: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
6673+
let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
6674+
let r: u16x8 = transmute(vsubl_high_u8(transmute(a), transmute(b)));
6675+
assert_eq!(r, e);
6676+
}
6677+
6678+
#[simd_test(enable = "neon")]
6679+
unsafe fn test_vsubl_high_u16() {
6680+
let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
6681+
let b: u16x8 = u16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
6682+
let e: u32x4 = u32x4::new(4, 5, 6, 7);
6683+
let r: u32x4 = transmute(vsubl_high_u16(transmute(a), transmute(b)));
6684+
assert_eq!(r, e);
6685+
}
6686+
6687+
#[simd_test(enable = "neon")]
6688+
unsafe fn test_vsubl_high_u32() {
6689+
let a: u32x4 = u32x4::new(12, 13, 14, 15);
6690+
let b: u32x4 = u32x4::new(6, 6, 8, 8);
6691+
let e: u64x2 = u64x2::new(6, 7);
6692+
let r: u64x2 = transmute(vsubl_high_u32(transmute(a), transmute(b)));
6693+
assert_eq!(r, e);
6694+
}
6695+
64626696
#[simd_test(enable = "neon")]
64636697
unsafe fn test_vmax_f64() {
64646698
let a: f64 = 1.0;

0 commit comments

Comments
 (0)