Skip to content

Commit cf8fb43

Browse files
[AArch64] Implement NEON vamin/vamax intrinsics (#99041)
This patch implements the intrinsics of the form floatNxM_t vamin[q]_fN(floatNxM_t vn, floatNxM_t vm); floatNxM_t vamax[q]_fN(floatNxM_t vn, floatNxM_t vm); as defined in ARM-software/acle#324 --------- Co-authored-by: Hassnaa Hamdi <[email protected]>
1 parent 32cef07 commit cf8fb43

File tree

8 files changed

+302
-3
lines changed

8 files changed

+302
-3
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2120,4 +2120,9 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
21202120
def VLUTI4_BF_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb",
21212121
[ImmCheck<3, ImmCheck0_3>]>;
21222122
}
2123-
}
2123+
}
2124+
2125+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
2126+
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
2127+
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
2128+
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13573,6 +13573,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1357313573
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1357413574
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1357513575
}
13576+
13577+
case NEON::BI__builtin_neon_vamin_f16:
13578+
case NEON::BI__builtin_neon_vaminq_f16:
13579+
case NEON::BI__builtin_neon_vamin_f32:
13580+
case NEON::BI__builtin_neon_vaminq_f32:
13581+
case NEON::BI__builtin_neon_vaminq_f64: {
13582+
Int = Intrinsic::aarch64_neon_famin;
13583+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
13584+
}
13585+
case NEON::BI__builtin_neon_vamax_f16:
13586+
case NEON::BI__builtin_neon_vamaxq_f16:
13587+
case NEON::BI__builtin_neon_vamax_f32:
13588+
case NEON::BI__builtin_neon_vamaxq_f32:
13589+
case NEON::BI__builtin_neon_vamaxq_f64: {
13590+
Int = Intrinsic::aarch64_neon_famax;
13591+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
13592+
}
1357613593
}
1357713594
}
1357813595

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -O3 -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -S -O3 -Werror -Wall -o /dev/null %s
4+
5+
// REQUIRES: aarch64-registered-target
6+
7+
#include <arm_neon.h>
8+
9+
// CHECK-LABEL: define dso_local <4 x half> @test_vamin_f16(
10+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
11+
// CHECK-NEXT: [[ENTRY:.*:]]
12+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
13+
// CHECK-NEXT: ret <4 x half> [[FAMIN2_I]]
14+
//
15+
float16x4_t test_vamin_f16(float16x4_t vn, float16x4_t vm) {
16+
return vamin_f16(vn, vm);
17+
}
18+
19+
// CHECK-LABEL: define dso_local <8 x half> @test_vaminq_f16(
20+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
21+
// CHECK-NEXT: [[ENTRY:.*:]]
22+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
23+
// CHECK-NEXT: ret <8 x half> [[FAMIN2_I]]
24+
//
25+
float16x8_t test_vaminq_f16(float16x8_t vn, float16x8_t vm) {
26+
return vaminq_f16(vn, vm);
27+
}
28+
29+
// CHECK-LABEL: define dso_local <2 x float> @test_vamin_f32(
30+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
31+
// CHECK-NEXT: [[ENTRY:.*:]]
32+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
33+
// CHECK-NEXT: ret <2 x float> [[FAMIN2_I]]
34+
//
35+
float32x2_t test_vamin_f32(float32x2_t vn, float32x2_t vm) {
36+
return vamin_f32(vn, vm);
37+
}
38+
39+
// CHECK-LABEL: define dso_local <4 x float> @test_vaminq_f32(
40+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
41+
// CHECK-NEXT: [[ENTRY:.*:]]
42+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
43+
// CHECK-NEXT: ret <4 x float> [[FAMIN2_I]]
44+
//
45+
float32x4_t test_vaminq_f32(float32x4_t vn, float32x4_t vm) {
46+
return vaminq_f32(vn, vm);
47+
}
48+
49+
// CHECK-LABEL: define dso_local <2 x double> @test_vaminq_f64(
50+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
51+
// CHECK-NEXT: [[ENTRY:.*:]]
52+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
53+
// CHECK-NEXT: ret <2 x double> [[FAMIN2_I]]
54+
//
55+
float64x2_t test_vaminq_f64(float64x2_t vn, float64x2_t vm) {
56+
return vaminq_f64(vn, vm);
57+
}
58+
59+
// CHECK-LABEL: define dso_local <4 x half> @test_vamax_f16(
60+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
61+
// CHECK-NEXT: [[ENTRY:.*:]]
62+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
63+
// CHECK-NEXT: ret <4 x half> [[FAMAX2_I]]
64+
//
65+
float16x4_t test_vamax_f16(float16x4_t vn, float16x4_t vm) {
66+
return vamax_f16(vn, vm);
67+
}
68+
69+
// CHECK-LABEL: define dso_local <8 x half> @test_vamaxq_f16(
70+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
71+
// CHECK-NEXT: [[ENTRY:.*:]]
72+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
73+
// CHECK-NEXT: ret <8 x half> [[FAMAX2_I]]
74+
//
75+
float16x8_t test_vamaxq_f16(float16x8_t vn, float16x8_t vm) {
76+
return vamaxq_f16(vn, vm);
77+
}
78+
79+
// CHECK-LABEL: define dso_local <2 x float> @test_vamax_f32(
80+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
81+
// CHECK-NEXT: [[ENTRY:.*:]]
82+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
83+
// CHECK-NEXT: ret <2 x float> [[FAMAX2_I]]
84+
//
85+
float32x2_t test_vamax_f32(float32x2_t vn, float32x2_t vm) {
86+
return vamax_f32(vn, vm);
87+
}
88+
89+
// CHECK-LABEL: define dso_local <4 x float> @test_vamaxq_f32(
90+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
91+
// CHECK-NEXT: [[ENTRY:.*:]]
92+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
93+
// CHECK-NEXT: ret <4 x float> [[FAMAX2_I]]
94+
//
95+
float32x4_t test_vamaxq_f32(float32x4_t vn, float32x4_t vm) {
96+
return vamaxq_f32(vn, vm);
97+
}
98+
99+
// CHECK-LABEL: define dso_local <2 x double> @test_vamaxq_f64(
100+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
101+
// CHECK-NEXT: [[ENTRY:.*:]]
102+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
103+
// CHECK-NEXT: ret <2 x double> [[FAMAX2_I]]
104+
//
105+
float64x2_t test_vamaxq_f64(float64x2_t vn, float64x2_t vm) {
106+
return vamaxq_f64(vn, vm);
107+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -emit-llvm -verify %s -o /dev/null
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
#include <arm_neon.h>
6+
7+
float16x4_t a16x4, b16x4;
8+
float16x8_t a16x8, b16x8;
9+
float32x2_t a32x2, b32x2;
10+
float32x4_t a32x4, b32x4;
11+
float64x2_t a64x2, b64x2;
12+
13+
14+
void test() {
15+
(void) vamin_f16 (a16x4, b16x4);
16+
// expected-error@-1 {{always_inline function 'vamin_f16' requires target feature 'faminmax'}}
17+
(void) vaminq_f16(a16x8, b16x8);
18+
// expected-error@-1 {{always_inline function 'vaminq_f16' requires target feature 'faminmax'}}
19+
(void) vamin_f32 (a32x2, b32x2);
20+
// expected-error@-1 {{always_inline function 'vamin_f32' requires target feature 'faminmax'}}
21+
(void) vaminq_f32(a32x4, b32x4);
22+
// expected-error@-1 {{always_inline function 'vaminq_f32' requires target feature 'faminmax'}}
23+
(void) vaminq_f64(a64x2, b64x2);
24+
// expected-error@-1 {{always_inline function 'vaminq_f64' requires target feature 'faminmax'}}
25+
(void) vamax_f16 (a16x4, b16x4);
26+
// expected-error@-1 {{always_inline function 'vamax_f16' requires target feature 'faminmax'}}
27+
(void) vamaxq_f16(a16x8, b16x8);
28+
// expected-error@-1 {{always_inline function 'vamaxq_f16' requires target feature 'faminmax'}}
29+
(void) vamax_f32 (a32x2, b32x2);
30+
// expected-error@-1 {{always_inline function 'vamax_f32' requires target feature 'faminmax'}}
31+
(void) vamaxq_f32(a32x4, b32x4);
32+
// expected-error@-1 {{always_inline function 'vamaxq_f32' requires target feature 'faminmax'}}
33+
(void) vamaxq_f64(a64x2, b64x2);
34+
// expected-error@-1 {{always_inline function 'vamaxq_f64' requires target feature 'faminmax'}}
35+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +faminmax -emit-llvm -verify %s -o /dev/null
2+
3+
// REQUIRES: aarch64-registered-target
4+
5+
#include <arm_neon.h>
6+
7+
float16x4_t a16x4, b16x4;
8+
float16x8_t a16x8, b16x8;
9+
float32x2_t a32x2, b32x2;
10+
float32x4_t a32x4, b32x4;
11+
float64x2_t a64x2, b64x2;
12+
13+
void test () {
14+
(void) vamin_f16 (a16x4, b16x4);
15+
// expected-error@-1 {{always_inline function 'vamin_f16' requires target feature 'neon'}}
16+
(void) vaminq_f16(a16x8, b16x8);
17+
// expected-error@-1 {{always_inline function 'vaminq_f16' requires target feature 'neon'}}
18+
(void) vamin_f32 (a32x2, b32x2);
19+
// expected-error@-1 {{always_inline function 'vamin_f32' requires target feature 'neon'}}
20+
(void) vaminq_f32(a32x4, b32x4);
21+
// expected-error@-1 {{always_inline function 'vaminq_f32' requires target feature 'neon'}}
22+
(void) vaminq_f64(a64x2, b64x2);
23+
// expected-error@-1 {{always_inline function 'vaminq_f64' requires target feature 'neon'}}
24+
(void) vamax_f16 (a16x4, b16x4);
25+
// expected-error@-1 {{always_inline function 'vamax_f16' requires target feature 'neon'}}
26+
(void) vamaxq_f16(a16x8, b16x8);
27+
// expected-error@-1 {{always_inline function 'vamaxq_f16' requires target feature 'neon'}}
28+
(void) vamax_f32 (a32x2, b32x2);
29+
// expected-error@-1 {{always_inline function 'vamax_f32' requires target feature 'neon'}}
30+
(void) vamaxq_f32(a32x4, b32x4);
31+
// expected-error@-1 {{always_inline function 'vamaxq_f32' requires target feature 'neon'}}
32+
(void) vamaxq_f64(a64x2, b64x2);
33+
// expected-error@-1 {{always_inline function 'vamaxq_f64' requires target feature 'neon'}}
34+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3801,3 +3801,6 @@ def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
38013801

38023802
def int_aarch64_sve_famin : AdvSIMD_Pred2VectorArg_Intrinsic;
38033803
def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
3804+
// Neon absolute maximum and minimum
3805+
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
3806+
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10134,13 +10134,15 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
1013410134
// fminimum(abs(a), abs(b)) -> famin(a, b)
1013510135
// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
1013610136
def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
10137-
[(fminimum (fabs node:$Rn), (fabs node:$Rm)),
10137+
[(int_aarch64_neon_famin node:$Rn, node:$Rm),
10138+
(fminimum (fabs node:$Rn), (fabs node:$Rm)),
1013810139
(fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
1013910140

1014010141
// fmaximum(abs(a), abs(b)) -> famax(a, b)
1014110142
// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
1014210143
def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
10143-
[(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
10144+
[(int_aarch64_neon_famax node:$Rn, node:$Rm),
10145+
(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
1014410146
(fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
1014510147

1014610148
let Predicates = [HasNEON, HasFAMINMAX] in {
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-linux"
5+
6+
define <4 x half> @test_famin_f16(<4 x half> %vn, <4 x half> %vm) #0 {
7+
; CHECK-LABEL: test_famin_f16:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
10+
; CHECK-NEXT: ret
11+
%res = call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> %vn, <4 x half> %vm)
12+
ret <4 x half> %res
13+
}
14+
15+
define <8 x half> @test_famin2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
16+
; CHECK-LABEL: test_famin2_f16:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
19+
; CHECK-NEXT: ret
20+
%res = call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> %vn, <8 x half> %vm)
21+
ret <8 x half> %res
22+
}
23+
24+
define <2 x float> @test_famin_f32(<2 x float> %vn, <2 x float> %vm) #0 {
25+
; CHECK-LABEL: test_famin_f32:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
28+
; CHECK-NEXT: ret
29+
%res = call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> %vn, <2 x float> %vm)
30+
ret <2 x float> %res
31+
}
32+
33+
define <4 x float> @test_famin2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
34+
; CHECK-LABEL: test_famin2_f32:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
37+
; CHECK-NEXT: ret
38+
%res = call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> %vn, <4 x float> %vm)
39+
ret <4 x float> %res
40+
}
41+
42+
define <2 x double> @test_famin_f64(<2 x double> %vn, <2 x double> %vm) #0 {
43+
; CHECK-LABEL: test_famin_f64:
44+
; CHECK: // %bb.0:
45+
; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
46+
; CHECK-NEXT: ret
47+
%res = call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> %vn, <2 x double> %vm)
48+
ret <2 x double> %res
49+
}
50+
51+
define <4 x half> @test_famax_f16(<4 x half> %vn, <4 x half> %vm) #0 {
52+
; CHECK-LABEL: test_famax_f16:
53+
; CHECK: // %bb.0:
54+
; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
55+
; CHECK-NEXT: ret
56+
%res = call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> %vn, <4 x half> %vm)
57+
ret <4 x half> %res
58+
}
59+
60+
define <8 x half> @test_famax2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
61+
; CHECK-LABEL: test_famax2_f16:
62+
; CHECK: // %bb.0:
63+
; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
64+
; CHECK-NEXT: ret
65+
%res = call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> %vn, <8 x half> %vm)
66+
ret <8 x half> %res
67+
}
68+
69+
define <2 x float> @test_famax_f32(<2 x float> %vn, <2 x float> %vm) #0 {
70+
; CHECK-LABEL: test_famax_f32:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
73+
; CHECK-NEXT: ret
74+
%res = call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> %vn, <2 x float> %vm)
75+
ret <2 x float> %res
76+
}
77+
78+
define <4 x float> @test_famax2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
79+
; CHECK-LABEL: test_famax2_f32:
80+
; CHECK: // %bb.0:
81+
; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
82+
; CHECK-NEXT: ret
83+
%res = call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> %vn, <4 x float> %vm)
84+
ret <4 x float> %res
85+
}
86+
87+
define <2 x double> @test_famax_f64(<2 x double> %vn, <2 x double> %vm) #0 {
88+
; CHECK-LABEL: test_famax_f64:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
91+
; CHECK-NEXT: ret
92+
%res = call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> %vn, <2 x double> %vm)
93+
ret <2 x double> %res
94+
}
95+
96+
attributes #0 = { "target-features"="+neon,+faminmax" }

0 commit comments

Comments
 (0)