Skip to content

Commit ee78697

Browse files
tclin914circYuan
andauthored
[RISCV] Support LLVM IR intrinsics for XAndesVPackFPH (#139860)
This patch adds LLVM IR intrinsic support for XAndesVPackFPH. The document for the intrinsics can be found at: https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/intrinsic_funcs.adoc#andes-vector-packed-fp16-extensionxandesvpackfph and with policy variants https://github.com/andestech/andes-vector-intrinsic-doc/blob/ast-v5_4_0-release-v5/auto-generated/andes-v5/policy_funcs/intrinsic_funcs.adoc#andes-vector-packed-fp16-extensionxandesvpackfph The clang part will be added in a later patch. Co-authored-by: Tony Chuan-Yue Yuan <[email protected]>
1 parent 2a8960e commit ee78697

File tree

5 files changed

+655
-1
lines changed

5 files changed

+655
-1
lines changed

llvm/include/llvm/IR/IntrinsicsRISCV.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,3 +1891,4 @@ let TargetPrefix = "riscv" in {
18911891
include "llvm/IR/IntrinsicsRISCVXTHead.td"
18921892
include "llvm/IR/IntrinsicsRISCVXsf.td"
18931893
include "llvm/IR/IntrinsicsRISCVXCV.td"
1894+
include "llvm/IR/IntrinsicsRISCVXAndes.td"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//===- IntrinsicsRISCVXAndes.td - Andes intrinsics ---------*- tablegen -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines all of the Andes vendor intrinsics for RISC-V.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
let TargetPrefix = "riscv" in {
14+
// Andes Vector Packed FP16 Extension
15+
defm nds_vfpmadt : RISCVBinaryAAXRoundingMode;
16+
defm nds_vfpmadb : RISCVBinaryAAXRoundingMode;
17+
}

llvm/lib/Target/RISCV/RISCVInstrInfoXAndes.td

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,33 @@ class NDSRVInstVD4DOT<bits<6> funct6, string opcodestr>
361361
let RVVConstraint = VMConstraint;
362362
}
363363

364+
//===----------------------------------------------------------------------===//
365+
// Multiclass
366+
//===----------------------------------------------------------------------===//
367+
368+
let fprclass = !cast<RegisterClass>("FPR32") in
369+
def SCALAR_F16_FPR32 : FPR_Info<16>;
370+
371+
let hasSideEffects = 0 in
372+
multiclass VPseudoVFPMAD_VF_RM {
373+
foreach m = SCALAR_F16_FPR32.MxList in {
374+
defm "" : VPseudoBinaryV_VF_RM<m, SCALAR_F16_FPR32, 0>,
375+
SchedBinary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
376+
m.MX, SCALAR_F16_FPR32.SEW, forcePassthruRead=true>;
377+
}
378+
}
379+
380+
multiclass VPatVFPMADBinaryV_VX_RM<string intrinsic, string instruction,
381+
list<VTypeInfo> vtilist> {
382+
foreach vti = vtilist in {
383+
defvar kind = "V"#vti.ScalarSuffix;
384+
defm : VPatBinaryRoundingMode<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
385+
vti.Vector, vti.Vector, f32, vti.Mask,
386+
vti.Log2SEW, vti.RegClass,
387+
vti.RegClass, FPR32>;
388+
}
389+
}
390+
364391
//===----------------------------------------------------------------------===//
365392
// XAndesPerf
366393
//===----------------------------------------------------------------------===//
@@ -433,7 +460,9 @@ def NDS_VD4DOTSU_VV : NDSRVInstVD4DOT<0b000101, "nds.vd4dotsu">;
433460
}
434461
} // DecoderNamespace = "XAndes"
435462

436-
// Patterns
463+
//===----------------------------------------------------------------------===//
464+
// Pseudo-instructions and codegen patterns
465+
//===----------------------------------------------------------------------===//
437466

438467
let Predicates = [HasVendorXAndesPerf] in {
439468

@@ -461,3 +490,12 @@ def : Sh1AddPat<NDS_LEA_H_ZE>;
461490
def : Sh2AddPat<NDS_LEA_W_ZE>;
462491
def : Sh3AddPat<NDS_LEA_D_ZE>;
463492
} // Predicates = [HasVendorXAndesPerf, IsRV64]
493+
494+
let Predicates = [HasVendorXAndesVPackFPH],
495+
mayRaiseFPException = true in {
496+
defm PseudoNDS_VFPMADT : VPseudoVFPMAD_VF_RM;
497+
defm PseudoNDS_VFPMADB : VPseudoVFPMAD_VF_RM;
498+
} // Predicates = [HasVendorXAndesVPackFPH]
499+
500+
defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadt", "PseudoNDS_VFPMADT", AllFP16Vectors>;
501+
defm : VPatVFPMADBinaryV_VX_RM<"int_riscv_nds_vfpmadb", "PseudoNDS_VFPMADB", AllFP16Vectors>;
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64x,+xandesvpackfph \
3+
; RUN: -verify-machineinstrs -target-abi=ilp32f | FileCheck %s
4+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64x,+xandesvpackfph \
5+
; RUN: -verify-machineinstrs -target-abi=lp64f | FileCheck %s
6+
7+
declare <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.nxv1f16.f32(
8+
<vscale x 1 x half>,
9+
<vscale x 1 x half>,
10+
float,
11+
iXLen, iXLen);
12+
13+
define <vscale x 1 x half> @intrinsic_vfpmadb_vf_nxv1f16_nxv1f16_f32(<vscale x 1 x half> %0, float %1, iXLen %2) nounwind {
14+
; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv1f16_nxv1f16_f32:
15+
; CHECK: # %bb.0: # %entry
16+
; CHECK-NEXT: fsrmi a1, 0
17+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
18+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
19+
; CHECK-NEXT: fsrm a1
20+
; CHECK-NEXT: ret
21+
entry:
22+
%a = tail call <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.nxv1f16.f32(
23+
<vscale x 1 x half> undef,
24+
<vscale x 1 x half> %0,
25+
float %1, iXLen 0, iXLen %2)
26+
27+
ret <vscale x 1 x half> %a
28+
}
29+
30+
declare <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.mask.nxv1f16.f32(
31+
<vscale x 1 x half>,
32+
<vscale x 1 x half>,
33+
float,
34+
<vscale x 1 x i1>,
35+
iXLen, iXLen, iXLen);
36+
37+
define <vscale x 1 x half> @intrinsic_vfpmadb_mask_vf_nxv1f16_nxv1f16_f32(<vscale x 1 x half> %0, <vscale x 1 x half> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
38+
; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv1f16_nxv1f16_f32:
39+
; CHECK: # %bb.0: # %entry
40+
; CHECK-NEXT: fsrmi a1, 0
41+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
42+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t
43+
; CHECK-NEXT: fsrm a1
44+
; CHECK-NEXT: ret
45+
entry:
46+
%a = tail call <vscale x 1 x half> @llvm.riscv.nds.vfpmadb.mask.nxv1f16.f32(
47+
<vscale x 1 x half> %0,
48+
<vscale x 1 x half> %1,
49+
float %2,
50+
<vscale x 1 x i1> %3,
51+
iXLen 0, iXLen %4, iXLen 1)
52+
53+
ret <vscale x 1 x half> %a
54+
}
55+
56+
declare <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.nxv2f16.f32(
57+
<vscale x 2 x half>,
58+
<vscale x 2 x half>,
59+
float,
60+
iXLen, iXLen);
61+
62+
define <vscale x 2 x half> @intrinsic_vfpmadb_vf_nxv2f16_nxv2f16_f32(<vscale x 2 x half> %0, float %1, iXLen %2) nounwind {
63+
; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv2f16_nxv2f16_f32:
64+
; CHECK: # %bb.0: # %entry
65+
; CHECK-NEXT: fsrmi a1, 0
66+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
67+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
68+
; CHECK-NEXT: fsrm a1
69+
; CHECK-NEXT: ret
70+
entry:
71+
%a = tail call <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.nxv2f16.f32(
72+
<vscale x 2 x half> undef,
73+
<vscale x 2 x half> %0,
74+
float %1, iXLen 0, iXLen %2)
75+
76+
ret <vscale x 2 x half> %a
77+
}
78+
79+
declare <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.mask.nxv2f16.f32(
80+
<vscale x 2 x half>,
81+
<vscale x 2 x half>,
82+
float,
83+
<vscale x 2 x i1>,
84+
iXLen, iXLen, iXLen);
85+
86+
define <vscale x 2 x half> @intrinsic_vfpmadb_mask_vf_nxv2f16_nxv2f16_f32(<vscale x 2 x half> %0, <vscale x 2 x half> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
87+
; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv2f16_nxv2f16_f32:
88+
; CHECK: # %bb.0: # %entry
89+
; CHECK-NEXT: fsrmi a1, 0
90+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
91+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t
92+
; CHECK-NEXT: fsrm a1
93+
; CHECK-NEXT: ret
94+
entry:
95+
%a = tail call <vscale x 2 x half> @llvm.riscv.nds.vfpmadb.mask.nxv2f16.f32(
96+
<vscale x 2 x half> %0,
97+
<vscale x 2 x half> %1,
98+
float %2,
99+
<vscale x 2 x i1> %3,
100+
iXLen 0, iXLen %4, iXLen 1)
101+
102+
ret <vscale x 2 x half> %a
103+
}
104+
105+
declare <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.nxv4f16.f32(
106+
<vscale x 4 x half>,
107+
<vscale x 4 x half>,
108+
float,
109+
iXLen, iXLen);
110+
111+
define <vscale x 4 x half> @intrinsic_vfpmadb_vf_nxv4f16_nxv4f16_f32(<vscale x 4 x half> %0, float %1, iXLen %2) nounwind {
112+
; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv4f16_nxv4f16_f32:
113+
; CHECK: # %bb.0: # %entry
114+
; CHECK-NEXT: fsrmi a1, 0
115+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
116+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
117+
; CHECK-NEXT: fsrm a1
118+
; CHECK-NEXT: ret
119+
entry:
120+
%a = tail call <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.nxv4f16.f32(
121+
<vscale x 4 x half> undef,
122+
<vscale x 4 x half> %0,
123+
float %1, iXLen 0, iXLen %2)
124+
125+
ret <vscale x 4 x half> %a
126+
}
127+
128+
declare <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.mask.nxv4f16.f32(
129+
<vscale x 4 x half>,
130+
<vscale x 4 x half>,
131+
float,
132+
<vscale x 4 x i1>,
133+
iXLen, iXLen, iXLen);
134+
135+
define <vscale x 4 x half> @intrinsic_vfpmadb_mask_vf_nxv4f16_nxv4f16_f32(<vscale x 4 x half> %0, <vscale x 4 x half> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
136+
; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv4f16_nxv4f16_f32:
137+
; CHECK: # %bb.0: # %entry
138+
; CHECK-NEXT: fsrmi a1, 0
139+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
140+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v9, v0.t
141+
; CHECK-NEXT: fsrm a1
142+
; CHECK-NEXT: ret
143+
entry:
144+
%a = tail call <vscale x 4 x half> @llvm.riscv.nds.vfpmadb.mask.nxv4f16.f32(
145+
<vscale x 4 x half> %0,
146+
<vscale x 4 x half> %1,
147+
float %2,
148+
<vscale x 4 x i1> %3,
149+
iXLen 0, iXLen %4, iXLen 1)
150+
151+
ret <vscale x 4 x half> %a
152+
}
153+
154+
declare <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.nxv8f16.f32(
155+
<vscale x 8 x half>,
156+
<vscale x 8 x half>,
157+
float,
158+
iXLen, iXLen);
159+
160+
define <vscale x 8 x half> @intrinsic_vfpmadb_vf_nxv8f16_nxv8f16_f32(<vscale x 8 x half> %0, float %1, iXLen %2) nounwind {
161+
; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv8f16_nxv8f16_f32:
162+
; CHECK: # %bb.0: # %entry
163+
; CHECK-NEXT: fsrmi a1, 0
164+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
165+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
166+
; CHECK-NEXT: fsrm a1
167+
; CHECK-NEXT: ret
168+
entry:
169+
%a = tail call <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.nxv8f16.f32(
170+
<vscale x 8 x half> undef,
171+
<vscale x 8 x half> %0,
172+
float %1, iXLen 0, iXLen %2)
173+
174+
ret <vscale x 8 x half> %a
175+
}
176+
177+
declare <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.mask.nxv8f16.f32(
178+
<vscale x 8 x half>,
179+
<vscale x 8 x half>,
180+
float,
181+
<vscale x 8 x i1>,
182+
iXLen, iXLen, iXLen);
183+
184+
define <vscale x 8 x half> @intrinsic_vfpmadb_mask_vf_nxv8f16_nxv8f16_f32(<vscale x 8 x half> %0, <vscale x 8 x half> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
185+
; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv8f16_nxv8f16_f32:
186+
; CHECK: # %bb.0: # %entry
187+
; CHECK-NEXT: fsrmi a1, 0
188+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
189+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v10, v0.t
190+
; CHECK-NEXT: fsrm a1
191+
; CHECK-NEXT: ret
192+
entry:
193+
%a = tail call <vscale x 8 x half> @llvm.riscv.nds.vfpmadb.mask.nxv8f16.f32(
194+
<vscale x 8 x half> %0,
195+
<vscale x 8 x half> %1,
196+
float %2,
197+
<vscale x 8 x i1> %3,
198+
iXLen 0, iXLen %4, iXLen 1)
199+
200+
ret <vscale x 8 x half> %a
201+
}
202+
203+
declare <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.nxv16f16.f32(
204+
<vscale x 16 x half>,
205+
<vscale x 16 x half>,
206+
float,
207+
iXLen, iXLen);
208+
209+
define <vscale x 16 x half> @intrinsic_vfpmadb_vf_nxv16f16_nxv16f16_f32(<vscale x 16 x half> %0, float %1, iXLen %2) nounwind {
210+
; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv16f16_nxv16f16_f32:
211+
; CHECK: # %bb.0: # %entry
212+
; CHECK-NEXT: fsrmi a1, 0
213+
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
214+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
215+
; CHECK-NEXT: fsrm a1
216+
; CHECK-NEXT: ret
217+
entry:
218+
%a = tail call <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.nxv16f16.f32(
219+
<vscale x 16 x half> undef,
220+
<vscale x 16 x half> %0,
221+
float %1, iXLen 0, iXLen %2)
222+
223+
ret <vscale x 16 x half> %a
224+
}
225+
226+
declare <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.mask.nxv16f16.f32(
227+
<vscale x 16 x half>,
228+
<vscale x 16 x half>,
229+
float,
230+
<vscale x 16 x i1>,
231+
iXLen, iXLen, iXLen);
232+
233+
define <vscale x 16 x half> @intrinsic_vfpmadb_mask_vf_nxv16f16_nxv16f16_f32(<vscale x 16 x half> %0, <vscale x 16 x half> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
234+
; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv16f16_nxv16f16_f32:
235+
; CHECK: # %bb.0: # %entry
236+
; CHECK-NEXT: fsrmi a1, 0
237+
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
238+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v12, v0.t
239+
; CHECK-NEXT: fsrm a1
240+
; CHECK-NEXT: ret
241+
entry:
242+
%a = tail call <vscale x 16 x half> @llvm.riscv.nds.vfpmadb.mask.nxv16f16.f32(
243+
<vscale x 16 x half> %0,
244+
<vscale x 16 x half> %1,
245+
float %2,
246+
<vscale x 16 x i1> %3,
247+
iXLen 0, iXLen %4, iXLen 1)
248+
249+
ret <vscale x 16 x half> %a
250+
}
251+
252+
declare <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.nxv32f16.f32(
253+
<vscale x 32 x half>,
254+
<vscale x 32 x half>,
255+
float,
256+
iXLen, iXLen);
257+
258+
define <vscale x 32 x half> @intrinsic_vfpmadb_vf_nxv32f16_nxv32f16_f32(<vscale x 32 x half> %0, float %1, iXLen %2) nounwind {
259+
; CHECK-LABEL: intrinsic_vfpmadb_vf_nxv32f16_nxv32f16_f32:
260+
; CHECK: # %bb.0: # %entry
261+
; CHECK-NEXT: fsrmi a1, 0
262+
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
263+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v8
264+
; CHECK-NEXT: fsrm a1
265+
; CHECK-NEXT: ret
266+
entry:
267+
%a = tail call <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.nxv32f16.f32(
268+
<vscale x 32 x half> undef,
269+
<vscale x 32 x half> %0,
270+
float %1, iXLen 0, iXLen %2)
271+
272+
ret <vscale x 32 x half> %a
273+
}
274+
275+
declare <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.mask.nxv32f16.f32(
276+
<vscale x 32 x half>,
277+
<vscale x 32 x half>,
278+
float,
279+
<vscale x 32 x i1>,
280+
iXLen, iXLen, iXLen);
281+
282+
define <vscale x 32 x half> @intrinsic_vfpmadb_mask_vf_nxv32f16_nxv32f16_f32(<vscale x 32 x half> %0, <vscale x 32 x half> %1, float %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
283+
; CHECK-LABEL: intrinsic_vfpmadb_mask_vf_nxv32f16_nxv32f16_f32:
284+
; CHECK: # %bb.0: # %entry
285+
; CHECK-NEXT: fsrmi a1, 0
286+
; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, mu
287+
; CHECK-NEXT: nds.vfpmadb.vf v8, fa0, v16, v0.t
288+
; CHECK-NEXT: fsrm a1
289+
; CHECK-NEXT: ret
290+
entry:
291+
%a = tail call <vscale x 32 x half> @llvm.riscv.nds.vfpmadb.mask.nxv32f16.f32(
292+
<vscale x 32 x half> %0,
293+
<vscale x 32 x half> %1,
294+
float %2,
295+
<vscale x 32 x i1> %3,
296+
iXLen 0, iXLen %4, iXLen 1)
297+
298+
ret <vscale x 32 x half> %a
299+
}

0 commit comments

Comments
 (0)