Skip to content

Commit 88fd2e4

Browse files
[AArch64][SME2] Add multi-vector FP convert from Float to interleave Half/BFloat intrinsic
Add the following intrinsic: FCVTN BFCVTN NOTE: These intrinsics are still in development and are subject to future changes. Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D142025
1 parent 86eff6b commit 88fd2e4

File tree

4 files changed

+66
-9
lines changed

4 files changed

+66
-9
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2752,6 +2752,17 @@ let TargetPrefix = "aarch64" in {
27522752
LLVMMatchType<0>, llvm_i32_ty],
27532753
[ImmArg<ArgIndex<6>>]>;
27542754

2755+
class SME2_CVT_VG2_SINGLE_Intrinsic
2756+
: DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
2757+
[llvm_anyvector_ty, LLVMMatchType<0>],
2758+
[IntrNoMem]>;
2759+
2760+
class SME2_CVT_VG2_SINGLE_BF16_Intrinsic
2761+
: DefaultAttrsIntrinsic<[llvm_nxv8bf16_ty],
2762+
[llvm_nxv4f32_ty, llvm_nxv4f32_ty],
2763+
[IntrNoMem]>;
2764+
2765+
27552766
//
27562767
// Multi-vector fused multiply-add/subtract
27572768
//
@@ -2804,4 +2815,10 @@ let TargetPrefix = "aarch64" in {
28042815

28052816
def int_aarch64_sme_suvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
28062817
def int_aarch64_sme_usvdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
2818+
2819+
//
2820+
// Multi-vector floating-point CVT from single-precision to interleaved half-precision/BFloat16
2821+
//
2822+
def int_aarch64_sve_fcvtn_x2 : SME2_CVT_VG2_SINGLE_Intrinsic;
2823+
def int_aarch64_sve_bfcvtn_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic;
28072824
}

llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -363,14 +363,14 @@ defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aar
363363
defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x2>;
364364
defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_vg2x4>;
365365

366-
defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000>;
367-
defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001>;
368-
defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000>;
369-
defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001>;
370-
371-
defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110>;
372-
defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111>;
373-
defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110>;
366+
defm FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b0000, nxv8f16, nxv4f32, null_frag>;
367+
defm FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b0001, nxv8f16, nxv4f32, int_aarch64_sve_fcvtn_x2>;
368+
defm BFCVT_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvt", 0b1000, nxv8bf16, nxv4f32, null_frag>;
369+
defm BFCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"bfcvtn", 0b1001, nxv8bf16, nxv4f32, int_aarch64_sve_bfcvtn_x2>;
370+
371+
defm SQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvt", 0b0110, nxv8i16, nxv4i32, null_frag>;
372+
defm UQCVT_Z2Z_StoH : sme2_cvt_vg2_single<"uqcvt", 0b0111, nxv8i16, nxv4i32, null_frag>;
373+
defm SQCVTU_Z2Z_StoH : sme2_cvt_vg2_single<"sqcvtu", 0b1110, nxv8i16, nxv4i32, null_frag>;
374374
defm SQCVT_Z4Z : sme2_int_cvt_vg4_single<"sqcvt", 0b000>;
375375
defm UQCVT_Z4Z : sme2_int_cvt_vg4_single<"uqcvt", 0b001>;
376376
defm SQCVTU_Z4Z : sme2_int_cvt_vg4_single<"sqcvtu", 0b100>;

llvm/lib/Target/AArch64/SMEInstrFormats.td

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,10 @@ class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic
133133
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
134134
zpr_ty:$Zm, imm_ty:$i)>;
135135

136+
class SME2_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
137+
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
138+
(!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
139+
136140
//===----------------------------------------------------------------------===//
137141
// SME Outer Products
138142
//===----------------------------------------------------------------------===//
@@ -2063,8 +2067,10 @@ class sme2_cvt_vg2_single<string mnemonic, bits<4> op>
20632067

20642068
// SME2 multi-vec FP down convert two registers
20652069
// SME2 multi-vec int down convert two registers
2066-
multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op> {
2070+
multiclass sme2_cvt_vg2_single<string mnemonic, bits<4> op, ValueType out_vt,
2071+
ValueType in_vt, SDPatternOperator intrinsic> {
20672072
def NAME : sme2_cvt_vg2_single<mnemonic, op>;
2073+
def : SME2_Cvt_VG2_Pat<NAME, intrinsic, out_vt, in_vt>;
20682074
}
20692075

20702076
class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty,
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 -verify-machineinstrs < %s | FileCheck %s
3+
4+
;
5+
; FCVTN
6+
;
7+
define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
8+
; CHECK-LABEL: multi_vector_cvtn_x2_f16:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
11+
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
12+
; CHECK-NEXT: fcvtn z0.h, { z0.s, z1.s }
13+
; CHECK-NEXT: ret
14+
%res = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
15+
ret <vscale x 8 x half> %res
16+
}
17+
18+
;
19+
; BFCVTN
20+
;
21+
22+
define <vscale x 8 x bfloat> @multi_vector_bfcvtn_x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
23+
; CHECK-LABEL: multi_vector_bfcvtn_x2:
24+
; CHECK: // %bb.0:
25+
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
26+
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
27+
; CHECK-NEXT: bfcvtn z0.h, { z0.s, z1.s }
28+
; CHECK-NEXT: ret
29+
%res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvtn.x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
30+
ret <vscale x 8 x bfloat> %res
31+
}
32+
33+
declare <vscale x 8 x half> @llvm.aarch64.sve.fcvtn.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
34+
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvtn.x2(<vscale x 4 x float>, <vscale x 4 x float>)

0 commit comments

Comments
 (0)