Skip to content

Commit f3188b9

Browse files
[AArch64][SME2] Add multi-vector frint intrinsics
Adds x2 and x4 vector intrinsics for the following instructions: - frinta - frintm - frintn - frintp NOTE: These intrinsics are still in development and are subject to future changes. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D143058
1 parent 2937454 commit f3188b9

File tree

3 files changed

+157
-0
lines changed

3 files changed

+157
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3044,6 +3044,13 @@ let TargetPrefix = "aarch64" in {
30443044
def int_aarch64_sve_sqdmulh_vgx2 : SME2_VG2_Multi_Multi_Intrinsic;
30453045
def int_aarch64_sve_sqdmulh_vgx4 : SME2_VG4_Multi_Multi_Intrinsic;
30463046

3047+
// Multi-vector floating-point round to integral value
3048+
3049+
foreach inst = ["a", "m", "n", "p"] in {
3050+
def int_aarch64_sve_frint # inst # _x2 : SVE2_VG2_ZipUzp_Intrinsic;
3051+
def int_aarch64_sve_frint # inst # _x4 : SVE2_VG4_ZipUzp_Intrinsic;
3052+
}
3053+
30473054
//
30483055
// Multi-vector min/max
30493056
//

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
367367
void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
368368
void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
369369
bool IsTupleInput, unsigned Opc);
370+
void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
370371

371372
template <unsigned MaxIdx, unsigned Scale>
372373
void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
@@ -1874,6 +1875,13 @@ void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
18741875
CurDAG->RemoveDeadNode(N);
18751876
}
18761877

1878+
void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1879+
unsigned Opcode) {
1880+
if (N->getValueType(0) != MVT::nxv4f32)
1881+
return;
1882+
SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1883+
}
1884+
18771885
void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
18781886
unsigned Op) {
18791887
SDLoc DL(N);
@@ -5380,6 +5388,30 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
53805388
SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
53815389
AArch64::UZP_VG4_4Z4Z_Q);
53825390
return;
5391+
case Intrinsic::aarch64_sve_frinta_x2:
5392+
SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
5393+
return;
5394+
case Intrinsic::aarch64_sve_frinta_x4:
5395+
SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
5396+
return;
5397+
case Intrinsic::aarch64_sve_frintm_x2:
5398+
SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
5399+
return;
5400+
case Intrinsic::aarch64_sve_frintm_x4:
5401+
SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
5402+
return;
5403+
case Intrinsic::aarch64_sve_frintn_x2:
5404+
SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
5405+
return;
5406+
case Intrinsic::aarch64_sve_frintn_x4:
5407+
SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
5408+
return;
5409+
case Intrinsic::aarch64_sve_frintp_x2:
5410+
SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
5411+
return;
5412+
case Intrinsic::aarch64_sve_frintp_x4:
5413+
SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
5414+
return;
53835415
}
53845416
break;
53855417
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s
3+
4+
; FRINTA
5+
6+
define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frinta_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
7+
; CHECK-LABEL: multi_vec_frinta_x2_f32:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: mov z3.d, z2.d
10+
; CHECK-NEXT: mov z2.d, z1.d
11+
; CHECK-NEXT: frinta { z0.s, z1.s }, { z2.s, z3.s }
12+
; CHECK-NEXT: ret
13+
%res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
14+
ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
15+
}
16+
17+
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frinta_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
18+
; CHECK-LABEL: multi_vec_frinta_x4_f32:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: mov z7.d, z4.d
21+
; CHECK-NEXT: mov z6.d, z3.d
22+
; CHECK-NEXT: mov z5.d, z2.d
23+
; CHECK-NEXT: mov z4.d, z1.d
24+
; CHECK-NEXT: frinta { z0.s - z3.s }, { z4.s - z7.s }
25+
; CHECK-NEXT: ret
26+
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
27+
ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
28+
}
29+
30+
; FRINTM
31+
32+
define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintm_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
33+
; CHECK-LABEL: multi_vec_frintm_x2_f32:
34+
; CHECK: // %bb.0:
35+
; CHECK-NEXT: mov z3.d, z2.d
36+
; CHECK-NEXT: mov z2.d, z1.d
37+
; CHECK-NEXT: frintm { z0.s, z1.s }, { z2.s, z3.s }
38+
; CHECK-NEXT: ret
39+
%res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
40+
ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
41+
}
42+
43+
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintm_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
44+
; CHECK-LABEL: multi_vec_frintm_x4_f32:
45+
; CHECK: // %bb.0:
46+
; CHECK-NEXT: mov z7.d, z4.d
47+
; CHECK-NEXT: mov z6.d, z3.d
48+
; CHECK-NEXT: mov z5.d, z2.d
49+
; CHECK-NEXT: mov z4.d, z1.d
50+
; CHECK-NEXT: frintm { z0.s - z3.s }, { z4.s - z7.s }
51+
; CHECK-NEXT: ret
52+
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
53+
ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
54+
}
55+
56+
; FRINTN
57+
58+
define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintn_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
59+
; CHECK-LABEL: multi_vec_frintn_x2_f32:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: mov z3.d, z2.d
62+
; CHECK-NEXT: mov z2.d, z1.d
63+
; CHECK-NEXT: frintn { z0.s, z1.s }, { z2.s, z3.s }
64+
; CHECK-NEXT: ret
65+
%res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
66+
ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
67+
}
68+
69+
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintn_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
70+
; CHECK-LABEL: multi_vec_frintn_x4_f32:
71+
; CHECK: // %bb.0:
72+
; CHECK-NEXT: mov z7.d, z4.d
73+
; CHECK-NEXT: mov z6.d, z3.d
74+
; CHECK-NEXT: mov z5.d, z2.d
75+
; CHECK-NEXT: mov z4.d, z1.d
76+
; CHECK-NEXT: frintn { z0.s - z3.s }, { z4.s - z7.s }
77+
; CHECK-NEXT: ret
78+
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
79+
ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
80+
}
81+
82+
; FRINTP
83+
84+
define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintp_x2_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
85+
; CHECK-LABEL: multi_vec_frintp_x2_f32:
86+
; CHECK: // %bb.0:
87+
; CHECK-NEXT: mov z3.d, z2.d
88+
; CHECK-NEXT: mov z2.d, z1.d
89+
; CHECK-NEXT: frintp { z0.s, z1.s }, { z2.s, z3.s }
90+
; CHECK-NEXT: ret
91+
%res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
92+
ret { <vscale x 4 x float>, <vscale x 4 x float> } %res
93+
}
94+
95+
define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_frintp_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
96+
; CHECK-LABEL: multi_vec_frintp_x4_f32:
97+
; CHECK: // %bb.0:
98+
; CHECK-NEXT: mov z7.d, z4.d
99+
; CHECK-NEXT: mov z6.d, z3.d
100+
; CHECK-NEXT: mov z5.d, z2.d
101+
; CHECK-NEXT: mov z4.d, z1.d
102+
; CHECK-NEXT: frintp { z0.s - z3.s }, { z4.s - z7.s }
103+
; CHECK-NEXT: ret
104+
%res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x4.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
105+
ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
106+
}
107+
108+
declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
109+
declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frinta.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
110+
111+
declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
112+
declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintm.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
113+
114+
declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
115+
declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintn.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
116+
117+
declare { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
118+
declare { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.frintp.x4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)

0 commit comments

Comments
 (0)