Skip to content

Commit c0c1120

Browse files
jacobbramleyAmanieu
authored andcommitted
Add more AArch64 vrnd intrinsics.
LLVM can't select float64x1_t variants, but float64x2_t variants work.
1 parent 3079fc7 commit c0c1120

File tree

3 files changed

+102
-9
lines changed

3 files changed

+102
-9
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15297,6 +15297,21 @@ pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
1529715297
vrnd32xq_f32_(a)
1529815298
}
1529915299

15300+
/// Floating-point round to 32-bit integer, using current rounding mode
15301+
///
15302+
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)
15303+
#[inline]
15304+
#[target_feature(enable = "neon,frintts")]
15305+
#[cfg_attr(test, assert_instr(frint32x))]
15306+
pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
15307+
#[allow(improper_ctypes)]
15308+
extern "unadjusted" {
15309+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v2f64")]
15310+
fn vrnd32xq_f64_(a: float64x2_t) -> float64x2_t;
15311+
}
15312+
vrnd32xq_f64_(a)
15313+
}
15314+
1530015315
/// Floating-point round to 32-bit integer toward zero
1530115316
///
1530215317
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)
@@ -15327,6 +15342,21 @@ pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
1532715342
vrnd32zq_f32_(a)
1532815343
}
1532915344

15345+
/// Floating-point round to 32-bit integer toward zero
15346+
///
15347+
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)
15348+
#[inline]
15349+
#[target_feature(enable = "neon,frintts")]
15350+
#[cfg_attr(test, assert_instr(frint32z))]
15351+
pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
15352+
#[allow(improper_ctypes)]
15353+
extern "unadjusted" {
15354+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v2f64")]
15355+
fn vrnd32zq_f64_(a: float64x2_t) -> float64x2_t;
15356+
}
15357+
vrnd32zq_f64_(a)
15358+
}
15359+
1533015360
/// Floating-point round to 64-bit integer, using current rounding mode
1533115361
///
1533215362
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)
@@ -15357,6 +15387,21 @@ pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
1535715387
vrnd64xq_f32_(a)
1535815388
}
1535915389

15390+
/// Floating-point round to 64-bit integer, using current rounding mode
15391+
///
15392+
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)
15393+
#[inline]
15394+
#[target_feature(enable = "neon,frintts")]
15395+
#[cfg_attr(test, assert_instr(frint64x))]
15396+
pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
15397+
#[allow(improper_ctypes)]
15398+
extern "unadjusted" {
15399+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v2f64")]
15400+
fn vrnd64xq_f64_(a: float64x2_t) -> float64x2_t;
15401+
}
15402+
vrnd64xq_f64_(a)
15403+
}
15404+
1536015405
/// Floating-point round to 64-bit integer toward zero
1536115406
///
1536215407
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)
@@ -15387,6 +15432,21 @@ pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
1538715432
vrnd64zq_f32_(a)
1538815433
}
1538915434

15435+
/// Floating-point round to 64-bit integer toward zero
15436+
///
15437+
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)
15438+
#[inline]
15439+
#[target_feature(enable = "neon,frintts")]
15440+
#[cfg_attr(test, assert_instr(frint64z))]
15441+
pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
15442+
#[allow(improper_ctypes)]
15443+
extern "unadjusted" {
15444+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v2f64")]
15445+
fn vrnd64zq_f64_(a: float64x2_t) -> float64x2_t;
15446+
}
15447+
vrnd64zq_f64_(a)
15448+
}
15449+
1539015450
/// Transpose vectors
1539115451
///
1539215452
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)
@@ -26810,6 +26870,14 @@ mod test {
2681026870
assert_eq!(r, e);
2681126871
}
2681226872

26873+
#[simd_test(enable = "neon,frintts")]
26874+
unsafe fn test_vrnd32xq_f64() {
26875+
let a: f64x2 = f64x2::new(1.1, 1.9);
26876+
let e: f64x2 = f64x2::new(1.0, 2.0);
26877+
let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
26878+
assert_eq!(r, e);
26879+
}
26880+
2681326881
#[simd_test(enable = "neon,frintts")]
2681426882
unsafe fn test_vrnd32z_f32() {
2681526883
let a: f32x2 = f32x2::new(1.1, 1.9);
@@ -26826,6 +26894,14 @@ mod test {
2682626894
assert_eq!(r, e);
2682726895
}
2682826896

26897+
#[simd_test(enable = "neon,frintts")]
26898+
unsafe fn test_vrnd32zq_f64() {
26899+
let a: f64x2 = f64x2::new(1.1, 1.9);
26900+
let e: f64x2 = f64x2::new(1.0, 1.0);
26901+
let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
26902+
assert_eq!(r, e);
26903+
}
26904+
2682926905
#[simd_test(enable = "neon,frintts")]
2683026906
unsafe fn test_vrnd64x_f32() {
2683126907
let a: f32x2 = f32x2::new(1.1, 1.9);
@@ -26842,6 +26918,14 @@ mod test {
2684226918
assert_eq!(r, e);
2684326919
}
2684426920

26921+
#[simd_test(enable = "neon,frintts")]
26922+
unsafe fn test_vrnd64xq_f64() {
26923+
let a: f64x2 = f64x2::new(1.1, 1.9);
26924+
let e: f64x2 = f64x2::new(1.0, 2.0);
26925+
let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
26926+
assert_eq!(r, e);
26927+
}
26928+
2684526929
#[simd_test(enable = "neon,frintts")]
2684626930
unsafe fn test_vrnd64z_f32() {
2684726931
let a: f32x2 = f32x2::new(1.1, 1.9);
@@ -26858,6 +26942,14 @@ mod test {
2685826942
assert_eq!(r, e);
2685926943
}
2686026944

26945+
#[simd_test(enable = "neon,frintts")]
26946+
unsafe fn test_vrnd64zq_f64() {
26947+
let a: f64x2 = f64x2::new(1.1, 1.9);
26948+
let e: f64x2 = f64x2::new(1.0, 1.0);
26949+
let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
26950+
assert_eq!(r, e);
26951+
}
26952+
2686126953
#[simd_test(enable = "neon")]
2686226954
unsafe fn test_vtrn1_s8() {
2686326955
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);

crates/intrinsic-test/missing_aarch64.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,16 @@ vbfmlaltq_laneq_f32
1414
vbfmmlaq_f32
1515

1616

17-
# Missing from both Clang and stdarch
18-
vrnd32x_f64
17+
# Implemented in stdarch, but missing in Clang.
1918
vrnd32xq_f64
20-
vrnd32z_f64
2119
vrnd32zq_f64
22-
vrnd64x_f64
2320
vrnd64xq_f64
24-
vrnd64z_f64
2521
vrnd64zq_f64
22+
# LLVM select error, and missing in Clang.
23+
vrnd32x_f64
24+
vrnd32z_f64
25+
vrnd64x_f64
26+
vrnd64z_f64
2627

2728
# LLVM select error in debug builds
2829
#vqshlu_n_s16

crates/stdarch-gen/neon.spec

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7218,7 +7218,7 @@ target = frintts
72187218

72197219
aarch64 = frint32x
72207220
link-aarch64 = frint32x._EXT_
7221-
generate float32x2_t, float32x4_t
7221+
generate float32x2_t, float32x4_t, float64x2_t
72227222

72237223
/// Floating-point round to 32-bit integer toward zero
72247224
name = vrnd32z
@@ -7228,7 +7228,7 @@ target = frintts
72287228

72297229
aarch64 = frint32z
72307230
link-aarch64 = frint32z._EXT_
7231-
generate float32x2_t, float32x4_t
7231+
generate float32x2_t, float32x4_t, float64x2_t
72327232

72337233
/// Floating-point round to 64-bit integer, using current rounding mode
72347234
name = vrnd64x
@@ -7238,7 +7238,7 @@ target = frintts
72387238

72397239
aarch64 = frint64x
72407240
link-aarch64 = frint64x._EXT_
7241-
generate float32x2_t, float32x4_t
7241+
generate float32x2_t, float32x4_t, float64x2_t
72427242

72437243
/// Floating-point round to 64-bit integer toward zero
72447244
name = vrnd64z
@@ -7248,7 +7248,7 @@ target = frintts
72487248

72497249
aarch64 = frint64z
72507250
link-aarch64 = frint64z._EXT_
7251-
generate float32x2_t, float32x4_t
7251+
generate float32x2_t, float32x4_t, float64x2_t
72527252

72537253
/// Transpose elements
72547254
name = vtrn

0 commit comments

Comments
 (0)