@@ -1564,7 +1564,7 @@ pub unsafe fn vclezd_f64(a: f64) -> u64 {
1564
1564
/// Compare signed less than zero
1565
1565
#[inline]
1566
1566
#[target_feature(enable = "neon")]
1567
- #[cfg_attr(test, assert_instr(sshr ))]
1567
+ #[cfg_attr(test, assert_instr(cmlt ))]
1568
1568
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1569
1569
pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
1570
1570
let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
@@ -1574,7 +1574,7 @@ pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
1574
1574
/// Compare signed less than zero
1575
1575
#[inline]
1576
1576
#[target_feature(enable = "neon")]
1577
- #[cfg_attr(test, assert_instr(sshr ))]
1577
+ #[cfg_attr(test, assert_instr(cmlt ))]
1578
1578
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1579
1579
pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
1580
1580
let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
@@ -1584,7 +1584,7 @@ pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
1584
1584
/// Compare signed less than zero
1585
1585
#[inline]
1586
1586
#[target_feature(enable = "neon")]
1587
- #[cfg_attr(test, assert_instr(sshr ))]
1587
+ #[cfg_attr(test, assert_instr(cmlt ))]
1588
1588
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1589
1589
pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
1590
1590
let b: i16x4 = i16x4::new(0, 0, 0, 0);
@@ -1594,7 +1594,7 @@ pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
1594
1594
/// Compare signed less than zero
1595
1595
#[inline]
1596
1596
#[target_feature(enable = "neon")]
1597
- #[cfg_attr(test, assert_instr(sshr ))]
1597
+ #[cfg_attr(test, assert_instr(cmlt ))]
1598
1598
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1599
1599
pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
1600
1600
let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
@@ -1604,7 +1604,7 @@ pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
1604
1604
/// Compare signed less than zero
1605
1605
#[inline]
1606
1606
#[target_feature(enable = "neon")]
1607
- #[cfg_attr(test, assert_instr(sshr ))]
1607
+ #[cfg_attr(test, assert_instr(cmlt ))]
1608
1608
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1609
1609
pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
1610
1610
let b: i32x2 = i32x2::new(0, 0);
@@ -1614,7 +1614,7 @@ pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
1614
1614
/// Compare signed less than zero
1615
1615
#[inline]
1616
1616
#[target_feature(enable = "neon")]
1617
- #[cfg_attr(test, assert_instr(sshr ))]
1617
+ #[cfg_attr(test, assert_instr(cmlt ))]
1618
1618
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1619
1619
pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
1620
1620
let b: i32x4 = i32x4::new(0, 0, 0, 0);
@@ -1624,7 +1624,7 @@ pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
1624
1624
/// Compare signed less than zero
1625
1625
#[inline]
1626
1626
#[target_feature(enable = "neon")]
1627
- #[cfg_attr(test, assert_instr(sshr ))]
1627
+ #[cfg_attr(test, assert_instr(cmlt ))]
1628
1628
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1629
1629
pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
1630
1630
let b: i64x1 = i64x1::new(0);
@@ -1634,7 +1634,7 @@ pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
1634
1634
/// Compare signed less than zero
1635
1635
#[inline]
1636
1636
#[target_feature(enable = "neon")]
1637
- #[cfg_attr(test, assert_instr(sshr ))]
1637
+ #[cfg_attr(test, assert_instr(cmlt ))]
1638
1638
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1639
1639
pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
1640
1640
let b: i64x2 = i64x2::new(0, 0);
@@ -2714,7 +2714,7 @@ pub unsafe fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(a: poly16x8_t,
2714
2714
/// Insert vector element from another vector element
2715
2715
#[inline]
2716
2716
#[target_feature(enable = "neon")]
2717
- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2717
+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
2718
2718
#[rustc_legacy_const_generics(1, 3)]
2719
2719
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
2720
2720
pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t, b: int64x1_t) -> int64x2_t {
@@ -2731,7 +2731,7 @@ pub unsafe fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(a: int64x2_t,
2731
2731
/// Insert vector element from another vector element
2732
2732
#[inline]
2733
2733
#[target_feature(enable = "neon")]
2734
- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2734
+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
2735
2735
#[rustc_legacy_const_generics(1, 3)]
2736
2736
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
2737
2737
pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t, b: uint64x1_t) -> uint64x2_t {
@@ -2748,7 +2748,7 @@ pub unsafe fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(a: uint64x2_t,
2748
2748
/// Insert vector element from another vector element
2749
2749
#[inline]
2750
2750
#[target_feature(enable = "neon")]
2751
- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2751
+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
2752
2752
#[rustc_legacy_const_generics(1, 3)]
2753
2753
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
2754
2754
pub unsafe fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(a: poly64x2_t, b: poly64x1_t) -> poly64x2_t {
@@ -2784,7 +2784,7 @@ pub unsafe fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(a: float32x4_t
2784
2784
/// Insert vector element from another vector element
2785
2785
#[inline]
2786
2786
#[target_feature(enable = "neon")]
2787
- #[cfg_attr(test, assert_instr(zip1 , LANE1 = 1, LANE2 = 0))]
2787
+ #[cfg_attr(test, assert_instr(mov , LANE1 = 1, LANE2 = 0))]
2788
2788
#[rustc_legacy_const_generics(1, 3)]
2789
2789
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
2790
2790
pub unsafe fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
@@ -9183,7 +9183,7 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
9183
9183
vmaxq_f64_(a, b)
9184
9184
}
9185
9185
9186
- /// Floating-point Maximun Number (vector)
9186
+ /// Floating-point Maximum Number (vector)
9187
9187
#[inline]
9188
9188
#[target_feature(enable = "neon")]
9189
9189
#[cfg_attr(test, assert_instr(fmaxnm))]
@@ -9197,7 +9197,7 @@ pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
9197
9197
vmaxnm_f64_(a, b)
9198
9198
}
9199
9199
9200
- /// Floating-point Maximun Number (vector)
9200
+ /// Floating-point Maximum Number (vector)
9201
9201
#[inline]
9202
9202
#[target_feature(enable = "neon")]
9203
9203
#[cfg_attr(test, assert_instr(fmaxnm))]
@@ -9379,7 +9379,7 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
9379
9379
vminq_f64_(a, b)
9380
9380
}
9381
9381
9382
- /// Floating-point Minimun Number (vector)
9382
+ /// Floating-point Minimum Number (vector)
9383
9383
#[inline]
9384
9384
#[target_feature(enable = "neon")]
9385
9385
#[cfg_attr(test, assert_instr(fminnm))]
@@ -9393,7 +9393,7 @@ pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
9393
9393
vminnm_f64_(a, b)
9394
9394
}
9395
9395
9396
- /// Floating-point Minimun Number (vector)
9396
+ /// Floating-point Minimum Number (vector)
9397
9397
#[inline]
9398
9398
#[target_feature(enable = "neon")]
9399
9399
#[cfg_attr(test, assert_instr(fminnm))]
@@ -10535,31 +10535,51 @@ pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32
10535
10535
#[target_feature(enable = "rdm")]
10536
10536
#[cfg_attr(test, assert_instr(sqrdmlah))]
10537
10537
pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
10538
- vqadd_s16(a, vqrdmulh_s16(b, c))
10538
+ #[allow(improper_ctypes)]
10539
+ extern "unadjusted" {
10540
+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i16")]
10541
+ fn vqrdmlah_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
10542
+ }
10543
+ vqrdmlah_s16_(a, b, c)
10539
10544
}
10540
10545
10541
10546
/// Signed saturating rounding doubling multiply accumulate returning high half
10542
10547
#[inline]
10543
10548
#[target_feature(enable = "rdm")]
10544
10549
#[cfg_attr(test, assert_instr(sqrdmlah))]
10545
10550
pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
10546
- vqaddq_s16(a, vqrdmulhq_s16(b, c))
10551
+ #[allow(improper_ctypes)]
10552
+ extern "unadjusted" {
10553
+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v8i16")]
10554
+ fn vqrdmlahq_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
10555
+ }
10556
+ vqrdmlahq_s16_(a, b, c)
10547
10557
}
10548
10558
10549
10559
/// Signed saturating rounding doubling multiply accumulate returning high half
10550
10560
#[inline]
10551
10561
#[target_feature(enable = "rdm")]
10552
10562
#[cfg_attr(test, assert_instr(sqrdmlah))]
10553
10563
pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
10554
- vqadd_s32(a, vqrdmulh_s32(b, c))
10564
+ #[allow(improper_ctypes)]
10565
+ extern "unadjusted" {
10566
+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v2i32")]
10567
+ fn vqrdmlah_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
10568
+ }
10569
+ vqrdmlah_s32_(a, b, c)
10555
10570
}
10556
10571
10557
10572
/// Signed saturating rounding doubling multiply accumulate returning high half
10558
10573
#[inline]
10559
10574
#[target_feature(enable = "rdm")]
10560
10575
#[cfg_attr(test, assert_instr(sqrdmlah))]
10561
10576
pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
10562
- vqaddq_s32(a, vqrdmulhq_s32(b, c))
10577
+ #[allow(improper_ctypes)]
10578
+ extern "unadjusted" {
10579
+ #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmlah.v4i32")]
10580
+ fn vqrdmlahq_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
10581
+ }
10582
+ vqrdmlahq_s32_(a, b, c)
10563
10583
}
10564
10584
10565
10585
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10591,7 +10611,8 @@ pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
10591
10611
#[rustc_legacy_const_generics(3)]
10592
10612
pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
10593
10613
static_assert_imm2!(LANE);
10594
- vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
10614
+ let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10615
+ vqrdmlah_s16(a, b, c)
10595
10616
}
10596
10617
10597
10618
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10601,7 +10622,8 @@ pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c:
10601
10622
#[rustc_legacy_const_generics(3)]
10602
10623
pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
10603
10624
static_assert_imm3!(LANE);
10604
- vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
10625
+ let c: int16x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10626
+ vqrdmlah_s16(a, b, c)
10605
10627
}
10606
10628
10607
10629
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10611,7 +10633,8 @@ pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c:
10611
10633
#[rustc_legacy_const_generics(3)]
10612
10634
pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
10613
10635
static_assert_imm2!(LANE);
10614
- vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
10636
+ let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10637
+ vqrdmlahq_s16(a, b, c)
10615
10638
}
10616
10639
10617
10640
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10621,7 +10644,8 @@ pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c:
10621
10644
#[rustc_legacy_const_generics(3)]
10622
10645
pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
10623
10646
static_assert_imm3!(LANE);
10624
- vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
10647
+ let c: int16x8_t = simd_shuffle8!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10648
+ vqrdmlahq_s16(a, b, c)
10625
10649
}
10626
10650
10627
10651
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10631,7 +10655,8 @@ pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c
10631
10655
#[rustc_legacy_const_generics(3)]
10632
10656
pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
10633
10657
static_assert_imm1!(LANE);
10634
- vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
10658
+ let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
10659
+ vqrdmlah_s32(a, b, c)
10635
10660
}
10636
10661
10637
10662
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10641,7 +10666,8 @@ pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c:
10641
10666
#[rustc_legacy_const_generics(3)]
10642
10667
pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
10643
10668
static_assert_imm2!(LANE);
10644
- vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
10669
+ let c: int32x2_t = simd_shuffle2!(c, c, <const LANE: i32> [LANE as u32, LANE as u32]);
10670
+ vqrdmlah_s32(a, b, c)
10645
10671
}
10646
10672
10647
10673
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10651,7 +10677,8 @@ pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c:
10651
10677
#[rustc_legacy_const_generics(3)]
10652
10678
pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
10653
10679
static_assert_imm1!(LANE);
10654
- vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
10680
+ let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10681
+ vqrdmlahq_s32(a, b, c)
10655
10682
}
10656
10683
10657
10684
/// Signed saturating rounding doubling multiply accumulate returning high half
@@ -10661,7 +10688,8 @@ pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c:
10661
10688
#[rustc_legacy_const_generics(3)]
10662
10689
pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
10663
10690
static_assert_imm2!(LANE);
10664
- vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
10691
+ let c: int32x4_t = simd_shuffle4!(c, c, <const LANE: i32> [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
10692
+ vqrdmlahq_s32(a, b, c)
10665
10693
}
10666
10694
10667
10695
/// Signed saturating rounding doubling multiply accumulate returning high half
0 commit comments