Skip to content

add vneg neon instructions #1087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1601,6 +1601,42 @@ pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
vcvtpq_u64_f64_(a)
}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(neg))]
pub unsafe fn vneg_s64(a: int64x1_t) -> int64x1_t {
let b: i64x1 = i64x1::new(0);
simd_sub(transmute(b), a)
Comment on lines +1609 to +1610
Copy link
Contributor

@CryZe CryZe Mar 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, I was actually looking into this one as well earlier and "gave up on it", because there didn't seem to be a way other than 0 - the_vec. Good to see I didn't miss anything xD

Though that still begs the question if there should just be a simd_neg instead, cause that's how you are supposed to get to this instruction via LLVM.

Copy link
Member Author

@SparrowLii SparrowLii Mar 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that core_arch does not have an instruction such as simd_neg, I don’t know how to add it, since I don’t know the compiler very well. :(

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for guiding. I will try later. I think the implementation of sub(0, a) is sufficient for now. Since the implementation in Clang is the same

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are different if you look at the code for floating-point numbers. 0.0 - x and -x have different behavior for floats with regards to negative zeros, NaNs, etc.

Copy link
Member Author

@SparrowLii SparrowLii Mar 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing. It looks like we do need to add simd_neg.

}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(neg))]
pub unsafe fn vnegq_s64(a: int64x2_t) -> int64x2_t {
let b: i64x2 = i64x2::new(0, 0);
simd_sub(transmute(b), a)
}

/// Floating-point negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fsub))]
pub unsafe fn vneg_f64(a: float64x1_t) -> float64x1_t {
let b: f64 = 0.;
simd_sub(transmute(b), a)
}

/// Floating-point negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fsub))]
pub unsafe fn vnegq_f64(a: float64x2_t) -> float64x2_t {
let b: f64x2 = f64x2::new(0., 0.);
simd_sub(transmute(b), a)
}

/// Multiply
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -3206,6 +3242,38 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vneg_s64() {
let a: i64x1 = i64x1::new(-7);
let e: i64x1 = i64x1::new(7);
let r: i64x1 = transmute(vneg_s64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vnegq_s64() {
let a: i64x2 = i64x2::new(-7, -6);
let e: i64x2 = i64x2::new(7, 6);
let r: i64x2 = transmute(vnegq_s64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vneg_f64() {
let a: f64 = -3.;
let e: f64 = 3.;
let r: f64 = transmute(vneg_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vnegq_f64() {
let a: f64x2 = f64x2::new(-3., -2.);
let e: f64x2 = f64x2::new(3., 2.);
let r: f64x2 = transmute(vnegq_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmul_f64() {
let a: f64 = 1.0;
Expand Down
152 changes: 152 additions & 0 deletions crates/core_arch/src/arm/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1981,6 +1981,94 @@ pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
simd_cast(a)
}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vneg))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t {
let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_sub(transmute(b), a)
}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vneg))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t {
let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
simd_sub(transmute(b), a)
}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vneg))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t {
let b: i16x4 = i16x4::new(0, 0, 0, 0);
simd_sub(transmute(b), a)
}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vneg))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t {
let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_sub(transmute(b), a)
}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vneg))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t {
let b: i32x2 = i32x2::new(0, 0);
simd_sub(transmute(b), a)
}

/// Negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vneg))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))]
pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t {
let b: i32x4 = i32x4::new(0, 0, 0, 0);
simd_sub(transmute(b), a)
}

/// Floating-point negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsub))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t {
let b: f32x2 = f32x2::new(0., 0.);
simd_sub(transmute(b), a)
}

/// Floating-point negate
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsub))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))]
pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t {
let b: f32x4 = f32x4::new(0., 0., 0., 0.);
simd_sub(transmute(b), a)
}

/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -5557,6 +5645,70 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vneg_s8() {
let a: i8x8 = i8x8::new(-7, -6, -5, -4, -3, -2, -1, 0);
let e: i8x8 = i8x8::new(7, 6, 5, 4, 3, 2, 1, 0);
let r: i8x8 = transmute(vneg_s8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vnegq_s8() {
let a: i8x16 = i8x16::new(-7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8);
let e: i8x16 = i8x16::new(7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8);
let r: i8x16 = transmute(vnegq_s8(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vneg_s16() {
let a: i16x4 = i16x4::new(-7, -6, -5, -4);
let e: i16x4 = i16x4::new(7, 6, 5, 4);
let r: i16x4 = transmute(vneg_s16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vnegq_s16() {
let a: i16x8 = i16x8::new(-7, -6, -5, -4, -3, -2, -1, 0);
let e: i16x8 = i16x8::new(7, 6, 5, 4, 3, 2, 1, 0);
let r: i16x8 = transmute(vnegq_s16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vneg_s32() {
let a: i32x2 = i32x2::new(-7, -6);
let e: i32x2 = i32x2::new(7, 6);
let r: i32x2 = transmute(vneg_s32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vnegq_s32() {
let a: i32x4 = i32x4::new(-7, -6, -5, -4);
let e: i32x4 = i32x4::new(7, 6, 5, 4);
let r: i32x4 = transmute(vnegq_s32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vneg_f32() {
let a: f32x2 = f32x2::new(-3., -2.);
let e: f32x2 = f32x2::new(3., 2.);
let r: f32x2 = transmute(vneg_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vnegq_f32() {
let a: f32x4 = f32x4::new(-3., -2., -1., 0.);
let e: f32x4 = f32x4::new(3., 2., 1., 0.);
let r: f32x4 = transmute(vnegq_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqsub_u8() {
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
Expand Down
28 changes: 28 additions & 0 deletions crates/stdarch-gen/neon.spec
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,34 @@ aarch64 = fcvtpu
link-aarch64 = fcvtpu._EXT2_._EXT_
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

/// Negate
name = vneg
multi_fn = fixed, b:in_t
multi_fn = simd_sub, transmute(b), a
a = -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8
fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
validate 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8

aarch64 = neg
generate int64x*_t

arm = vneg
generate int*_t

/// Floating-point negate
name = vneg
multi_fn = fixed, b:in_t
multi_fn = simd_sub, transmute(b), a
a = -3., -2., -1., 0., 1., 2., 3., 4.
fixed = 0., 0., 0., 0., 0., 0., 0., 0.
validate 3., 2., 1., 0., -1., -2., -3., -4.

aarch64 = fsub
generate float64x*_t

arm = vsub
generate float*_t

/// Saturating subtract
name = vqsub
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
Expand Down