Skip to content

Commit 886c48d

Browse files
[AArch64] Implement NEON vamin/vamax intrinsics
This patch implements the intrinsics of the form floatNxM_t vamin[q]_fN(floatNxM_t vn, floatNxM_t vm); floatNxM_t vamax[q]_fN(floatNxM_t vn, floatNxM_t vm); as defined in ARM-software/acle#324 Co-authored-by: Hassnaa Hamdi <[email protected]>
1 parent c2b92a4 commit 886c48d

File tree

7 files changed

+257
-2
lines changed

7 files changed

+257
-2
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2115,3 +2115,8 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
21152115
def VLUTI4_BF_X2_Q : SInst<"vluti4_laneq_x2", ".2(<U)I", "Qb">;
21162116
}
21172117
}
2118+
2119+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "faminmax" in {
2120+
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
2121+
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
2122+
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13570,6 +13570,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1357013570
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1357113571
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1357213572
}
13573+
13574+
case NEON::BI__builtin_neon_vamin_f16:
13575+
case NEON::BI__builtin_neon_vaminq_f16:
13576+
case NEON::BI__builtin_neon_vamin_f32:
13577+
case NEON::BI__builtin_neon_vaminq_f32:
13578+
case NEON::BI__builtin_neon_vaminq_f64: {
13579+
Int = Intrinsic::aarch64_neon_famin;
13580+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin");
13581+
}
13582+
case NEON::BI__builtin_neon_vamax_f16:
13583+
case NEON::BI__builtin_neon_vamaxq_f16:
13584+
case NEON::BI__builtin_neon_vamax_f32:
13585+
case NEON::BI__builtin_neon_vamaxq_f32:
13586+
case NEON::BI__builtin_neon_vamaxq_f64: {
13587+
Int = Intrinsic::aarch64_neon_famax;
13588+
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax");
13589+
}
1357313590
}
1357413591
}
1357513592

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
#include <arm_neon.h>
3+
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -O3 -emit-llvm -o - %s | FileCheck %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -S -O3 -Werror -Wall -o /dev/null %s
6+
7+
// CHECK-LABEL: define dso_local <4 x half> @test_vamin_f16(
8+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9+
// CHECK-NEXT: [[ENTRY:.*:]]
10+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
11+
// CHECK-NEXT: ret <4 x half> [[FAMIN2_I]]
12+
//
13+
float16x4_t test_vamin_f16(float16x4_t vn, float16x4_t vm) {
14+
return vamin_f16(vn, vm);
15+
}
16+
17+
// CHECK-LABEL: define dso_local <8 x half> @test_vaminq_f16(
18+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
19+
// CHECK-NEXT: [[ENTRY:.*:]]
20+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
21+
// CHECK-NEXT: ret <8 x half> [[FAMIN2_I]]
22+
//
23+
float16x8_t test_vaminq_f16(float16x8_t vn, float16x8_t vm) {
24+
return vaminq_f16(vn, vm);
25+
26+
}
27+
28+
// CHECK-LABEL: define dso_local <2 x float> @test_vamin_f32(
29+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
30+
// CHECK-NEXT: [[ENTRY:.*:]]
31+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
32+
// CHECK-NEXT: ret <2 x float> [[FAMIN2_I]]
33+
//
34+
float32x2_t test_vamin_f32(float32x2_t vn, float32x2_t vm) {
35+
return vamin_f32(vn, vm);
36+
37+
}
38+
39+
// CHECK-LABEL: define dso_local <4 x float> @test_vaminq_f32(
40+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
41+
// CHECK-NEXT: [[ENTRY:.*:]]
42+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
43+
// CHECK-NEXT: ret <4 x float> [[FAMIN2_I]]
44+
//
45+
float32x4_t test_vaminq_f32(float32x4_t vn, float32x4_t vm) {
46+
return vaminq_f32(vn, vm);
47+
48+
}
49+
50+
// CHECK-LABEL: define dso_local <2 x double> @test_vaminq_f64(
51+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
52+
// CHECK-NEXT: [[ENTRY:.*:]]
53+
// CHECK-NEXT: [[FAMIN2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
54+
// CHECK-NEXT: ret <2 x double> [[FAMIN2_I]]
55+
//
56+
float64x2_t test_vaminq_f64(float64x2_t vn, float64x2_t vm) {
57+
return vaminq_f64(vn, vm);
58+
}
59+
60+
61+
// CHECK-LABEL: define dso_local <4 x half> @test_vamax_f16(
62+
// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
63+
// CHECK-NEXT: [[ENTRY:.*:]]
64+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> [[VN]], <4 x half> [[VM]])
65+
// CHECK-NEXT: ret <4 x half> [[FAMAX2_I]]
66+
//
67+
float16x4_t test_vamax_f16(float16x4_t vn, float16x4_t vm) {
68+
return vamax_f16(vn, vm);
69+
}
70+
71+
// CHECK-LABEL: define dso_local <8 x half> @test_vamaxq_f16(
72+
// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
73+
// CHECK-NEXT: [[ENTRY:.*:]]
74+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> [[VN]], <8 x half> [[VM]])
75+
// CHECK-NEXT: ret <8 x half> [[FAMAX2_I]]
76+
//
77+
float16x8_t test_vamaxq_f16(float16x8_t vn, float16x8_t vm) {
78+
return vamaxq_f16(vn, vm);
79+
80+
}
81+
82+
// CHECK-LABEL: define dso_local <2 x float> @test_vamax_f32(
83+
// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
84+
// CHECK-NEXT: [[ENTRY:.*:]]
85+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> [[VN]], <2 x float> [[VM]])
86+
// CHECK-NEXT: ret <2 x float> [[FAMAX2_I]]
87+
//
88+
float32x2_t test_vamax_f32(float32x2_t vn, float32x2_t vm) {
89+
return vamax_f32(vn, vm);
90+
91+
}
92+
93+
// CHECK-LABEL: define dso_local <4 x float> @test_vamaxq_f32(
94+
// CHECK-SAME: <4 x float> noundef [[VN:%.*]], <4 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
95+
// CHECK-NEXT: [[ENTRY:.*:]]
96+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> [[VN]], <4 x float> [[VM]])
97+
// CHECK-NEXT: ret <4 x float> [[FAMAX2_I]]
98+
//
99+
float32x4_t test_vamaxq_f32(float32x4_t vn, float32x4_t vm) {
100+
return vamaxq_f32(vn, vm);
101+
102+
}
103+
104+
// CHECK-LABEL: define dso_local <2 x double> @test_vamaxq_f64(
105+
// CHECK-SAME: <2 x double> noundef [[VN:%.*]], <2 x double> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] {
106+
// CHECK-NEXT: [[ENTRY:.*:]]
107+
// CHECK-NEXT: [[FAMAX2_I:%.*]] = tail call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> [[VN]], <2 x double> [[VM]])
108+
// CHECK-NEXT: ret <2 x double> [[FAMAX2_I]]
109+
//
110+
float64x2_t test_vamaxq_f64(float64x2_t vn, float64x2_t vm) {
111+
return vamaxq_f64(vn, vm);
112+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3792,3 +3792,6 @@ def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
37923792

37933793
def int_aarch64_sve_famin : AdvSIMD_Pred2VectorArg_Intrinsic;
37943794
def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
3795+
// Neon absolute maximum and minimum
3796+
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
3797+
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5992,6 +5992,26 @@ multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
59925992
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
59935993
}
59945994

5995+
let mayRaiseFPException = 1, Uses = [FPCR] in
5996+
multiclass SIMDThreeVectorFP<bit U, bit S, bits<3> opc,
5997+
string asm, SDPatternOperator OpNode> {
5998+
def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
5999+
asm, ".4h",
6000+
[(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4i16 V64:$Rm)))]>;
6001+
def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
6002+
asm, ".8h",
6003+
[(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8i16 V128:$Rm)))]>;
6004+
def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
6005+
asm, ".2s",
6006+
[(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2i32 V64:$Rm)))]>;
6007+
def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
6008+
asm, ".4s",
6009+
[(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4i32 V128:$Rm)))]>;
6010+
def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
6011+
asm, ".2d",
6012+
[(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2i64 V128:$Rm)))]>;
6013+
}
6014+
59956015
let mayRaiseFPException = 1, Uses = [FPCR] in
59966016
multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
59976017
string asm,

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10134,13 +10134,15 @@ let Uses = [FPMR, FPCR], Predicates = [HasFP8] in {
1013410134
// fminimum(abs(a), abs(b)) -> famin(a, b)
1013510135
// fminnum[nnan](abs(a), abs(b)) -> famin(a, b)
1013610136
def AArch64famin : PatFrags<(ops node:$Rn, node:$Rm),
10137-
[(fminimum (fabs node:$Rn), (fabs node:$Rm)),
10137+
[(int_aarch64_neon_famin node:$Rn, node:$Rm),
10138+
(fminimum (fabs node:$Rn), (fabs node:$Rm)),
1013810139
(fminnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
1013910140

1014010141
// fmaximum(abs(a), abs(b)) -> famax(a, b)
1014110142
// fmaxnum[nnan](abs(a), abs(b)) -> famax(a, b)
1014210143
def AArch64famax : PatFrags<(ops node:$Rn, node:$Rm),
10143-
[(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
10144+
[(int_aarch64_neon_famax node:$Rn, node:$Rm),
10145+
(fmaximum (fabs node:$Rn), (fabs node:$Rm)),
1014410146
(fmaxnum_nnan (fabs node:$Rn), (fabs node:$Rm))]>;
1014510147

1014610148
let Predicates = [HasNEON, HasFAMINMAX] in {
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-linux"
5+
6+
define <4 x half> @test_famin_f16(<4 x half> %vn, <4 x half> %vm) #0 {
7+
; CHECK-LABEL: test_famin_f16:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: famin v0.4h, v0.4h, v1.4h
10+
; CHECK-NEXT: ret
11+
%res = call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> %vn, <4 x half> %vm)
12+
ret <4 x half> %res
13+
}
14+
15+
define <8 x half> @test_famin2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
16+
; CHECK-LABEL: test_famin2_f16:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: famin v0.8h, v0.8h, v1.8h
19+
; CHECK-NEXT: ret
20+
%res = call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> %vn, <8 x half> %vm)
21+
ret <8 x half> %res
22+
}
23+
24+
define <2 x float> @test_famin_f32(<2 x float> %vn, <2 x float> %vm) #0 {
25+
; CHECK-LABEL: test_famin_f32:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: famin v0.2s, v0.2s, v1.2s
28+
; CHECK-NEXT: ret
29+
%res = call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> %vn, <2 x float> %vm)
30+
ret <2 x float> %res
31+
}
32+
33+
define <4 x float> @test_famin2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
34+
; CHECK-LABEL: test_famin2_f32:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: famin v0.4s, v0.4s, v1.4s
37+
; CHECK-NEXT: ret
38+
%res = call <4 x float> @llvm.aarch64.neon.famin.v4f32(<4 x float> %vn, <4 x float> %vm)
39+
ret <4 x float> %res
40+
}
41+
42+
define <2 x double> @test_famin_f64(<2 x double> %vn, <2 x double> %vm) #0 {
43+
; CHECK-LABEL: test_famin_f64:
44+
; CHECK: // %bb.0:
45+
; CHECK-NEXT: famin v0.2d, v0.2d, v1.2d
46+
; CHECK-NEXT: ret
47+
%res = call <2 x double> @llvm.aarch64.neon.famin.v2f64(<2 x double> %vn, <2 x double> %vm)
48+
ret <2 x double> %res
49+
}
50+
51+
define <4 x half> @test_famax_f16(<4 x half> %vn, <4 x half> %vm) #0 {
52+
; CHECK-LABEL: test_famax_f16:
53+
; CHECK: // %bb.0:
54+
; CHECK-NEXT: famax v0.4h, v0.4h, v1.4h
55+
; CHECK-NEXT: ret
56+
%res = call <4 x half> @llvm.aarch64.neon.famax.v4f16(<4 x half> %vn, <4 x half> %vm)
57+
ret <4 x half> %res
58+
}
59+
60+
define <8 x half> @test_famax2_f16(<8 x half> %vn, <8 x half> %vm) #0 {
61+
; CHECK-LABEL: test_famax2_f16:
62+
; CHECK: // %bb.0:
63+
; CHECK-NEXT: famax v0.8h, v0.8h, v1.8h
64+
; CHECK-NEXT: ret
65+
%res = call <8 x half> @llvm.aarch64.neon.famax.v8f16(<8 x half> %vn, <8 x half> %vm)
66+
ret <8 x half> %res
67+
}
68+
69+
define <2 x float> @test_famax_f32(<2 x float> %vn, <2 x float> %vm) #0 {
70+
; CHECK-LABEL: test_famax_f32:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: famax v0.2s, v0.2s, v1.2s
73+
; CHECK-NEXT: ret
74+
%res = call <2 x float> @llvm.aarch64.neon.famax.v2f32(<2 x float> %vn, <2 x float> %vm)
75+
ret <2 x float> %res
76+
}
77+
78+
define <4 x float> @test_famax2_f32(<4 x float> %vn, <4 x float> %vm) #0 {
79+
; CHECK-LABEL: test_famax2_f32:
80+
; CHECK: // %bb.0:
81+
; CHECK-NEXT: famax v0.4s, v0.4s, v1.4s
82+
; CHECK-NEXT: ret
83+
%res = call <4 x float> @llvm.aarch64.neon.famax.v4f32(<4 x float> %vn, <4 x float> %vm)
84+
ret <4 x float> %res
85+
}
86+
87+
define <2 x double> @test_famax_f64(<2 x double> %vn, <2 x double> %vm) #0 {
88+
; CHECK-LABEL: test_famax_f64:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: famax v0.2d, v0.2d, v1.2d
91+
; CHECK-NEXT: ret
92+
%res = call <2 x double> @llvm.aarch64.neon.famax.v2f64(<2 x double> %vn, <2 x double> %vm)
93+
ret <2 x double> %res
94+
}
95+
96+
attributes #0 = { "target-features"="+neon,+faminmax" }

0 commit comments

Comments
 (0)