Skip to content

Commit 17c28cf

Browse files
committed
Add bitwise equality operations
1 parent 39be83a commit 17c28cf

File tree

2 files changed

+210
-13
lines changed

2 files changed

+210
-13
lines changed

crates/core_arch/src/aarch64/neon.rs

+92
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,49 @@ extern "C" {
232232
) -> int8x16_t;
233233
}
234234

235+
macro_rules! aarch64_simd_2 {
236+
($name:ident, $type:ty, $simd_fn:ident, $intr:ident) => {
237+
#[inline]
238+
#[target_feature(enable = "neon")]
239+
#[cfg_attr(test, assert_instr($intr))]
240+
pub unsafe fn $name(a: $type, b: $type) -> $type {
241+
$simd_fn(a, b)
242+
}
243+
};
244+
}
245+
macro_rules! aarch64_simd_ceq {
246+
($name:ident, $type:ty) => {
247+
/// Compare bitwise Equal (vector)
248+
aarch64_simd_2!($name, $type, simd_eq, cmeq);
249+
};
250+
}
251+
252+
aarch64_simd_ceq!(vceq_s64, int64x1_t);
253+
aarch64_simd_ceq!(vceqq_s64, int64x2_t);
254+
aarch64_simd_ceq!(vceq_u64, uint64x1_t);
255+
aarch64_simd_ceq!(vceqq_u64, uint64x2_t);
256+
257+
/// Compare bitwise Equal (vector)
258+
#[inline]
259+
#[target_feature(enable = "neon")]
260+
#[cfg_attr(test, assert_instr(fcmeq))]
261+
pub unsafe fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t {
262+
simd_eq(a, b)
263+
}
264+
265+
/// Compare bitwise Equal (vector)
266+
#[inline]
267+
#[target_feature(enable = "neon")]
268+
#[cfg_attr(test, assert_instr(fcmeq))]
269+
pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
270+
simd_eq(a, b)
271+
}
272+
273+
aarch64_simd_ceq!(vceq_p64, uint64x1_t);
274+
aarch64_simd_ceq!(vceqq_p64, uint64x2_t);
275+
276+
277+
235278
/// Vector add.
236279
#[inline]
237280
#[target_feature(enable = "neon")]
@@ -1584,6 +1627,55 @@ mod tests {
15841627
assert_eq!(r, e);
15851628
}
15861629

1630+
1631+
#[simd_test(enable = "neon")]
1632+
unsafe fn test_vceq_s64() {
1633+
let a = i64x1::new(0x0001020304050607);
1634+
let b = i64x1::new(-1);
1635+
let r: i64x1 = transmute(vceq_s64(transmute(a), transmute(a)));
1636+
assert_eq!(r, b);
1637+
}
1638+
1639+
#[simd_test(enable = "neon")]
1640+
unsafe fn test_vceqq_s64() {
1641+
let a = i64x2::new(0x0001020304050607, 0x08090A0B0C0D0E0F);
1642+
let b = i64x2::new(-1, -1);
1643+
let r: i64x2 = transmute(vceqq_s64(transmute(a), transmute(a)));
1644+
assert_eq!(r, b);
1645+
}
1646+
1647+
#[simd_test(enable = "neon")]
1648+
unsafe fn test_vceq_u64() {
1649+
let a = u64x1::new(0x0001020304050607);
1650+
let b = u64x1::new(0xFFFFFFFFFFFFFFFF);
1651+
let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(a)));
1652+
assert_eq!(r, b);
1653+
}
1654+
1655+
#[simd_test(enable = "neon")]
1656+
unsafe fn test_vceqq_u64() {
1657+
let a = u64x2::new(0x0001020304050607, 0x08090A0B0C0D0E0F);
1658+
let b = u64x2::new(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF);
1659+
let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(a)));
1660+
assert_eq!(r, b);
1661+
}
1662+
1663+
1664+
#[simd_test(enable = "neon")]
1665+
unsafe fn test_vceq_f64() {
1666+
let a: f64 = 1.2;
1667+
let b = u64x1::new(0xFFFFFFFFFFFFFFFF);
1668+
let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(a)));
1669+
assert_eq!(r, b);
1670+
}
1671+
1672+
#[simd_test(enable = "neon")]
1673+
unsafe fn test_vceqq_f32() {
1674+
let a = f64x2::new(1.2, 3.4);
1675+
let b = u64x2::new(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF);
1676+
let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(a)));
1677+
assert_eq!(r, b);
1678+
}
15871679
#[simd_test(enable = "neon")]
15881680
unsafe fn test_vmaxv_s8() {
15891681
let r = vmaxv_s8(transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5)));

crates/core_arch/src/arm/neon.rs

+118-13
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,7 @@ arm_simd_eor!(veorq_u64, uint64x2_t);
784784

785785
macro_rules! arm_simd_ceq {
786786
($name:ident, $type:ty) => {
787-
/// Vector bitwise exclusive or (vector).
787+
/// Compare bitwise Equal (vector)
788788
arm_simd_2!($name, $type, simd_eq, cmeq, cmeq);
789789
};
790790
}
@@ -802,7 +802,6 @@ arm_simd_ceq!(vceqq_u16, uint16x8_t);
802802
arm_simd_ceq!(vceq_u32, uint32x2_t);
803803
arm_simd_ceq!(vceqq_u32, uint32x4_t);
804804

805-
806805
// arm_simd_ceq!(vceq_f32, float32x2_t); // we have a different return type
807806
#[inline]
808807
#[target_feature(enable = "neon")]
@@ -826,16 +825,6 @@ pub unsafe fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
826825
arm_simd_ceq!(vceq_p8, poly8x8_t);
827826
arm_simd_ceq!(vceqq_p8, poly8x16_t);
828827

829-
// TODO:
830-
// uint64x1_t vceq_s64 (int64x1_t a, int64x1_t b)Compare bitwise equal
831-
// uint64x2_t vceqq_s64 (int64x2_t a, int64x2_t b)Compare bitwise equal
832-
// uint64x1_t vceq_u64 (uint64x1_t a, uint64x1_t b)Compare bitwise equal
833-
// uint64x2_t vceqq_u64 (uint64x2_t a, uint64x2_t b)Compare bitwise equal
834-
// uint64x1_t vceq_p64 (poly64x1_t a, poly64x1_t b)Compare bitwise equal
835-
// uint64x2_t vceqq_p64 (poly64x2_t a, poly64x2_t b)Compare bitwise equal
836-
// ui nt64x1_t vceq_f64 (float64x1_t a, float64x1_t b)Floating-point compare equal
837-
// uint64x2_t vceqq_f64 (float64x2_t a, float64x2_t b)Floating-point compare equal
838-
839828
/// Folding minimum of adjacent pairs
840829
#[inline]
841830
#[target_feature(enable = "neon")]
@@ -1842,7 +1831,7 @@ mod tests {
18421831
assert_eq!(r, a);
18431832
}
18441833

1845-
#[simd_test(enable = "neon")]
1834+
#[simd_test(enable = "neon")]
18461835
unsafe fn test_veor_s8() {
18471836
let a = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
18481837
let b = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
@@ -1970,6 +1959,122 @@ mod tests {
19701959
assert_eq!(r, b);
19711960
}
19721961

1962+
#[simd_test(enable = "neon")]
1963+
unsafe fn test_vceq_s8() {
1964+
let a = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
1965+
let b = i8x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
1966+
let r: i8x8 = transmute(vceq_s8(transmute(a), transmute(a)));
1967+
assert_eq!(r, b);
1968+
}
1969+
1970+
#[simd_test(enable = "neon")]
1971+
unsafe fn test_vceqq_s8() {
1972+
let a = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
1973+
let b = i8x16::new(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
1974+
let r: i8x16 = transmute(vceqq_s8(transmute(a), transmute(a)));
1975+
assert_eq!(r, b);
1976+
}
1977+
1978+
#[simd_test(enable = "neon")]
1979+
unsafe fn test_vceq_s16() {
1980+
let a = i16x4::new(0x0001, 0x0203, 0x0405, 0x0607);
1981+
let b = i16x4::new(-1, -1, -1, -1);
1982+
let r: i16x4 = transmute(vceq_s16(transmute(a), transmute(a)));
1983+
assert_eq!(r, b);
1984+
}
1985+
1986+
#[simd_test(enable = "neon")]
1987+
unsafe fn test_vceqq_s16() {
1988+
let a = i16x8::new(0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F);
1989+
let b = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1);
1990+
let r: i16x8 = transmute(vceqq_s16(transmute(a), transmute(a)));
1991+
assert_eq!(r, b);
1992+
}
1993+
1994+
#[simd_test(enable = "neon")]
1995+
unsafe fn test_vceq_s32() {
1996+
let a = i32x2::new(0x00010203, 0x04050607);
1997+
let b = i32x2::new(-1, -1);
1998+
let r: i32x2 = transmute(vceq_s32(transmute(a), transmute(a)));
1999+
assert_eq!(r, b);
2000+
}
2001+
2002+
#[simd_test(enable = "neon")]
2003+
unsafe fn test_vceqq_s32() {
2004+
let a = i32x4::new(0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F);
2005+
let b = i32x4::new(-1, -1, -1, -1);
2006+
let r: i32x4 = transmute(vceqq_s32(transmute(a), transmute(a)));
2007+
assert_eq!(r, b);
2008+
}
2009+
2010+
2011+
2012+
#[simd_test(enable = "neon")]
2013+
unsafe fn test_vceq_u8() {
2014+
let a = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07);
2015+
let b = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2016+
let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(a)));
2017+
assert_eq!(r, b);
2018+
}
2019+
2020+
#[simd_test(enable = "neon")]
2021+
unsafe fn test_vceqq_u8() {
2022+
let a = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F);
2023+
let b = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2024+
let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(a)));
2025+
assert_eq!(r, b);
2026+
}
2027+
2028+
#[simd_test(enable = "neon")]
2029+
unsafe fn test_vceq_u16() {
2030+
let a = u16x4::new(0x0001, 0x0203, 0x0405, 0x0607);
2031+
let b = u16x4::new(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
2032+
let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(a)));
2033+
assert_eq!(r, b);
2034+
}
2035+
2036+
#[simd_test(enable = "neon")]
2037+
unsafe fn test_vceqq_u16() {
2038+
let a = u16x8::new(0x0001, 0x0203, 0x0405, 0x0607, 0x0809, 0x0A0B, 0x0C0D, 0x0E0F);
2039+
let b = u16x8::new(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
2040+
let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(a)));
2041+
assert_eq!(r, b);
2042+
}
2043+
2044+
#[simd_test(enable = "neon")]
2045+
unsafe fn test_vceq_u32() {
2046+
let a = u32x2::new(0x00010203, 0x04050607);
2047+
let b = u32x2::new(0xFFFFFFFF, 0xFFFFFFFF);
2048+
let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(a)));
2049+
assert_eq!(r, b);
2050+
}
2051+
2052+
#[simd_test(enable = "neon")]
2053+
unsafe fn test_vceqq_u32() {
2054+
let a = u32x4::new(0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F);
2055+
let b = u32x4::new(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
2056+
let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(a)));
2057+
assert_eq!(r, b);
2058+
}
2059+
2060+
2061+
#[simd_test(enable = "neon")]
2062+
unsafe fn test_vceq_f32() {
2063+
let a = f32x2::new(1.2, 2.3);
2064+
let b = u32x2::new(0xFFFFFFFF, 0xFFFFFFFF);
2065+
let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(a)));
2066+
assert_eq!(r, b);
2067+
}
2068+
2069+
#[simd_test(enable = "neon")]
2070+
unsafe fn test_vceqq_f32() {
2071+
let a = f32x4::new(1.2, 3.4, 5.6, 7.8);
2072+
let b = u32x4::new(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
2073+
let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(a)));
2074+
assert_eq!(r, b);
2075+
}
2076+
2077+
19732078
#[simd_test(enable = "neon")]
19742079
unsafe fn test_vmovn_s16() {
19752080
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);

0 commit comments

Comments
 (0)