Skip to content

Commit d628369

Browse files
authored
Merge branch 'master' into vshl
2 parents 50fb8df + 5260eb1 commit d628369

File tree

5 files changed

+729
-22
lines changed

5 files changed

+729
-22
lines changed

ci/docker/wasm32-wasi/Dockerfile

+4-12
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,14 @@
1-
FROM rust:1.50.0
2-
3-
# Install wasmtime from source for now while the `experimental_x64` feature is
4-
# not yet the default. (it's not actually that experimental at the time of this
5-
# writing, wasmtime should switch defaults soon and the backend this enables has
6-
# better support for simd instructions)
7-
RUN \
8-
CARGO_INCREMENTAL=0 \
9-
CARGO_PROFILE_DEV_DEBUGINFO=0 \
10-
cargo install wasmtime-cli --features experimental_x64 --debug --vers 0.25.0 --locked
11-
121
FROM ubuntu:20.04
132

143
ENV DEBIAN_FRONTEND=noninteractive
154
RUN apt-get update -y && apt-get install -y --no-install-recommends \
165
ca-certificates \
6+
curl \
7+
xz-utils \
178
clang
189

19-
COPY --from=0 /usr/local/cargo/bin/wasmtime /usr/local/bin/wasmtime
10+
RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.26.0/wasmtime-v0.26.0-x86_64-linux.tar.xz | tar xJf -
11+
ENV PATH=$PATH:/wasmtime-v0.26.0-x86_64-linux
2012

2113
ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \
2214
--enable-simd \

crates/core_arch/src/aarch64/neon/generated.rs

+234
Original file line numberDiff line numberDiff line change
@@ -2512,6 +2512,132 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
25122512
simd_sub(a, b)
25132513
}
25142514

2515+
/// Signed Subtract Wide
2516+
#[inline]
2517+
#[target_feature(enable = "neon")]
2518+
#[cfg_attr(test, assert_instr(ssubw))]
2519+
pub unsafe fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
2520+
let c: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2521+
simd_sub(a, simd_cast(c))
2522+
}
2523+
2524+
/// Signed Subtract Wide
2525+
#[inline]
2526+
#[target_feature(enable = "neon")]
2527+
#[cfg_attr(test, assert_instr(ssubw))]
2528+
pub unsafe fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
2529+
let c: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2530+
simd_sub(a, simd_cast(c))
2531+
}
2532+
2533+
/// Signed Subtract Wide
2534+
#[inline]
2535+
#[target_feature(enable = "neon")]
2536+
#[cfg_attr(test, assert_instr(ssubw))]
2537+
pub unsafe fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
2538+
let c: int32x2_t = simd_shuffle2(b, b, [2, 3]);
2539+
simd_sub(a, simd_cast(c))
2540+
}
2541+
2542+
/// Unsigned Subtract Wide
2543+
#[inline]
2544+
#[target_feature(enable = "neon")]
2545+
#[cfg_attr(test, assert_instr(usubw))]
2546+
pub unsafe fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
2547+
let c: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2548+
simd_sub(a, simd_cast(c))
2549+
}
2550+
2551+
/// Unsigned Subtract Wide
2552+
#[inline]
2553+
#[target_feature(enable = "neon")]
2554+
#[cfg_attr(test, assert_instr(usubw))]
2555+
pub unsafe fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
2556+
let c: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2557+
simd_sub(a, simd_cast(c))
2558+
}
2559+
2560+
/// Unsigned Subtract Wide
2561+
#[inline]
2562+
#[target_feature(enable = "neon")]
2563+
#[cfg_attr(test, assert_instr(usubw))]
2564+
pub unsafe fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
2565+
let c: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
2566+
simd_sub(a, simd_cast(c))
2567+
}
2568+
2569+
/// Signed Subtract Long
2570+
#[inline]
2571+
#[target_feature(enable = "neon")]
2572+
#[cfg_attr(test, assert_instr(ssubl))]
2573+
pub unsafe fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
2574+
let c: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
2575+
let d: int16x8_t = simd_cast(c);
2576+
let e: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2577+
let f: int16x8_t = simd_cast(e);
2578+
simd_sub(d, f)
2579+
}
2580+
2581+
/// Signed Subtract Long
2582+
#[inline]
2583+
#[target_feature(enable = "neon")]
2584+
#[cfg_attr(test, assert_instr(ssubl))]
2585+
pub unsafe fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
2586+
let c: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
2587+
let d: int32x4_t = simd_cast(c);
2588+
let e: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2589+
let f: int32x4_t = simd_cast(e);
2590+
simd_sub(d, f)
2591+
}
2592+
2593+
/// Signed Subtract Long
2594+
#[inline]
2595+
#[target_feature(enable = "neon")]
2596+
#[cfg_attr(test, assert_instr(ssubl))]
2597+
pub unsafe fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
2598+
let c: int32x2_t = simd_shuffle2(a, a, [2, 3]);
2599+
let d: int64x2_t = simd_cast(c);
2600+
let e: int32x2_t = simd_shuffle2(b, b, [2, 3]);
2601+
let f: int64x2_t = simd_cast(e);
2602+
simd_sub(d, f)
2603+
}
2604+
2605+
/// Unsigned Subtract Long
2606+
#[inline]
2607+
#[target_feature(enable = "neon")]
2608+
#[cfg_attr(test, assert_instr(usubl))]
2609+
pub unsafe fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
2610+
let c: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
2611+
let d: uint16x8_t = simd_cast(c);
2612+
let e: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
2613+
let f: uint16x8_t = simd_cast(e);
2614+
simd_sub(d, f)
2615+
}
2616+
2617+
/// Unsigned Subtract Long
2618+
#[inline]
2619+
#[target_feature(enable = "neon")]
2620+
#[cfg_attr(test, assert_instr(usubl))]
2621+
pub unsafe fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
2622+
let c: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
2623+
let d: uint32x4_t = simd_cast(c);
2624+
let e: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
2625+
let f: uint32x4_t = simd_cast(e);
2626+
simd_sub(d, f)
2627+
}
2628+
2629+
/// Unsigned Subtract Long
2630+
#[inline]
2631+
#[target_feature(enable = "neon")]
2632+
#[cfg_attr(test, assert_instr(usubl))]
2633+
pub unsafe fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
2634+
let c: uint32x2_t = simd_shuffle2(a, a, [2, 3]);
2635+
let d: uint64x2_t = simd_cast(c);
2636+
let e: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
2637+
let f: uint64x2_t = simd_cast(e);
2638+
simd_sub(d, f)
2639+
}
2640+
25152641
/// Maximum (vector)
25162642
#[inline]
25172643
#[target_feature(enable = "neon")]
@@ -6585,6 +6711,114 @@ mod test {
65856711
assert_eq!(r, e);
65866712
}
65876713

6714+
#[simd_test(enable = "neon")]
6715+
unsafe fn test_vsubw_high_s8() {
6716+
let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);
6717+
let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16);
6718+
let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
6719+
let r: i16x8 = transmute(vsubw_high_s8(transmute(a), transmute(b)));
6720+
assert_eq!(r, e);
6721+
}
6722+
6723+
#[simd_test(enable = "neon")]
6724+
unsafe fn test_vsubw_high_s16() {
6725+
let a: i32x4 = i32x4::new(8, 9, 10, 11);
6726+
let b: i16x8 = i16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
6727+
let e: i32x4 = i32x4::new(0, 0, 0, 0);
6728+
let r: i32x4 = transmute(vsubw_high_s16(transmute(a), transmute(b)));
6729+
assert_eq!(r, e);
6730+
}
6731+
6732+
#[simd_test(enable = "neon")]
6733+
unsafe fn test_vsubw_high_s32() {
6734+
let a: i64x2 = i64x2::new(8, 9);
6735+
let b: i32x4 = i32x4::new(6, 7, 8, 9);
6736+
let e: i64x2 = i64x2::new(0, 0);
6737+
let r: i64x2 = transmute(vsubw_high_s32(transmute(a), transmute(b)));
6738+
assert_eq!(r, e);
6739+
}
6740+
6741+
#[simd_test(enable = "neon")]
6742+
unsafe fn test_vsubw_high_u8() {
6743+
let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
6744+
let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
6745+
let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
6746+
let r: u16x8 = transmute(vsubw_high_u8(transmute(a), transmute(b)));
6747+
assert_eq!(r, e);
6748+
}
6749+
6750+
#[simd_test(enable = "neon")]
6751+
unsafe fn test_vsubw_high_u16() {
6752+
let a: u32x4 = u32x4::new(8, 9, 10, 11);
6753+
let b: u16x8 = u16x8::new(0, 1, 2, 3, 8, 9, 10, 11);
6754+
let e: u32x4 = u32x4::new(0, 0, 0, 0);
6755+
let r: u32x4 = transmute(vsubw_high_u16(transmute(a), transmute(b)));
6756+
assert_eq!(r, e);
6757+
}
6758+
6759+
#[simd_test(enable = "neon")]
6760+
unsafe fn test_vsubw_high_u32() {
6761+
let a: u64x2 = u64x2::new(8, 9);
6762+
let b: u32x4 = u32x4::new(6, 7, 8, 9);
6763+
let e: u64x2 = u64x2::new(0, 0);
6764+
let r: u64x2 = transmute(vsubw_high_u32(transmute(a), transmute(b)));
6765+
assert_eq!(r, e);
6766+
}
6767+
6768+
#[simd_test(enable = "neon")]
6769+
unsafe fn test_vsubl_high_s8() {
6770+
let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
6771+
let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
6772+
let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
6773+
let r: i16x8 = transmute(vsubl_high_s8(transmute(a), transmute(b)));
6774+
assert_eq!(r, e);
6775+
}
6776+
6777+
#[simd_test(enable = "neon")]
6778+
unsafe fn test_vsubl_high_s16() {
6779+
let a: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
6780+
let b: i16x8 = i16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
6781+
let e: i32x4 = i32x4::new(4, 5, 6, 7);
6782+
let r: i32x4 = transmute(vsubl_high_s16(transmute(a), transmute(b)));
6783+
assert_eq!(r, e);
6784+
}
6785+
6786+
#[simd_test(enable = "neon")]
6787+
unsafe fn test_vsubl_high_s32() {
6788+
let a: i32x4 = i32x4::new(12, 13, 14, 15);
6789+
let b: i32x4 = i32x4::new(6, 6, 8, 8);
6790+
let e: i64x2 = i64x2::new(6, 7);
6791+
let r: i64x2 = transmute(vsubl_high_s32(transmute(a), transmute(b)));
6792+
assert_eq!(r, e);
6793+
}
6794+
6795+
#[simd_test(enable = "neon")]
6796+
unsafe fn test_vsubl_high_u8() {
6797+
let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
6798+
let b: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
6799+
let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
6800+
let r: u16x8 = transmute(vsubl_high_u8(transmute(a), transmute(b)));
6801+
assert_eq!(r, e);
6802+
}
6803+
6804+
#[simd_test(enable = "neon")]
6805+
unsafe fn test_vsubl_high_u16() {
6806+
let a: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
6807+
let b: u16x8 = u16x8::new(6, 6, 6, 6, 8, 8, 8, 8);
6808+
let e: u32x4 = u32x4::new(4, 5, 6, 7);
6809+
let r: u32x4 = transmute(vsubl_high_u16(transmute(a), transmute(b)));
6810+
assert_eq!(r, e);
6811+
}
6812+
6813+
#[simd_test(enable = "neon")]
6814+
unsafe fn test_vsubl_high_u32() {
6815+
let a: u32x4 = u32x4::new(12, 13, 14, 15);
6816+
let b: u32x4 = u32x4::new(6, 6, 8, 8);
6817+
let e: u64x2 = u64x2::new(6, 7);
6818+
let r: u64x2 = transmute(vsubl_high_u32(transmute(a), transmute(b)));
6819+
assert_eq!(r, e);
6820+
}
6821+
65886822
#[simd_test(enable = "neon")]
65896823
unsafe fn test_vmax_f64() {
65906824
let a: f64 = 1.0;

0 commit comments

Comments
 (0)