Skip to content

Commit 30f386c

Browse files
authored
[AArch64] Fixup destructive floating-point precision conversions (#118788)
This patch changes the zeroing forms of `FCVTXNT`, `FCVTNT`, and `BFCVTNT` such that their destination operand is also listed as a dag input. These narrowing down-conversions leave the even elements of the destination vector unchanged, regardless of the predicate type. This patch also makes the merging form of `BFCVTNT` non-movprfx'able. - `FCVTXNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/FCVTXNT--Floating-point-down-convert--rounding-to-odd--top--predicated--?lang=en) - `FCVTNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/FCVTNT--predicated---Floating-point-down-convert-and-narrow--top--predicated--?lang=en) - `BFCVTNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/BFCVTNT--Floating-point-down-convert-and-narrow-to-BFloat16--top--predicated--?lang=en)
1 parent ecdc528 commit 30f386c

File tree

4 files changed

+48
-91
lines changed

4 files changed

+48
-91
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,8 +2455,9 @@ let Predicates = [HasBF16, HasSVEorSME] in {
24552455
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
24562456
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
24572457
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
2458-
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
2459-
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
2458+
2459+
defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
2460+
defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
24602461
} // End HasBF16, HasSVEorSME
24612462

24622463
let Predicates = [HasSVEorSME] in {
@@ -4268,17 +4269,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
42684269
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
42694270

42704271
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
4271-
defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
4272-
def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
4272+
defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
4273+
def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
42734274
// Placing even
4274-
defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
4275+
defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
42754276

42764277
// SVE2p2 floating-point convert precision up, zeroing predicate
4277-
defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt", "int_aarch64_sve_fcvtlt">;
4278+
defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
42784279

42794280
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
4280-
def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
4281-
// Placing corresponding
4281+
def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
42824282
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
42834283

42844284
// Floating-point convert to integer, zeroing predicate

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 30 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,85 +2787,68 @@ multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
27872787
// SVE2 Floating Point Convert Group
27882788
//===----------------------------------------------------------------------===//
27892789

2790-
class sve2_fp_convert_precision<bits<4> opc, string asm,
2791-
ZPRRegOp zprty1, ZPRRegOp zprty2>
2792-
: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
2793-
asm, "\t$Zd, $Pg/m, $Zn",
2790+
class sve2_fp_convert_precision<bits<4> opc, bit merging, string asm,
2791+
ZPRRegOp zprty1, ZPRRegOp zprty2, bit destructive=merging>
2792+
: I<(outs zprty1:$Zd),
2793+
!if(destructive, (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
2794+
(ins PPR3bAny:$Pg, zprty2:$Zn)),
2795+
asm, "\t$Zd, " # !if(merging, "$Pg/m", "$Pg/z") # ", $Zn",
27942796
"",
27952797
[]>, Sched<[]> {
27962798
bits<5> Zd;
27972799
bits<5> Zn;
27982800
bits<3> Pg;
27992801
let Inst{31-24} = 0b01100100;
28002802
let Inst{23-22} = opc{3-2};
2801-
let Inst{21-18} = 0b0010;
2803+
let Inst{21-20} = 0b00;
2804+
let Inst{19} = merging;
2805+
let Inst{18} = 0b0;
28022806
let Inst{17-16} = opc{1-0};
28032807
let Inst{15-13} = 0b101;
28042808
let Inst{12-10} = Pg;
28052809
let Inst{9-5} = Zn;
28062810
let Inst{4-0} = Zd;
28072811

2808-
let Constraints = "$Zd = $_Zd";
2812+
let Constraints = !if(destructive, "$Zd = $_Zd", "");
28092813
let hasSideEffects = 0;
28102814
let mayRaiseFPException = 1;
28112815
}
28122816

28132817
multiclass sve2_fp_convert_down_narrow<string asm, string op> {
2814-
def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
2815-
def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
2818+
def _StoH : sve2_fp_convert_precision<0b1000, 0b1, asm, ZPR16, ZPR32>;
2819+
def _DtoS : sve2_fp_convert_precision<0b1110, 0b1, asm, ZPR32, ZPR64>;
28162820

28172821
def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
28182822
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
28192823
}
28202824

28212825
multiclass sve2_fp_convert_up_long<string asm, string op> {
2822-
def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
2823-
def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
2826+
def _HtoS : sve2_fp_convert_precision<0b1001, 0b1, asm, ZPR32, ZPR16>;
2827+
def _StoD : sve2_fp_convert_precision<0b1111, 0b1, asm, ZPR64, ZPR32>;
28242828

28252829
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
28262830
def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
28272831
}
28282832

28292833
multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
2830-
def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
2834+
def _DtoS : sve2_fp_convert_precision<0b0010, 0b1, asm, ZPR32, ZPR64>;
28312835

28322836
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
28332837
}
28342838

2835-
class sve_fp_fcvt2z<bits<4> opc, string asm, ZPRRegOp zprty1,
2836-
ZPRRegOp zprty2>
2837-
: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
2838-
asm, "\t$Zd, $Pg/z, $Zn",
2839-
"",
2840-
[]>, Sched<[]> {
2841-
bits<5> Zd;
2842-
bits<5> Zn;
2843-
bits<3> Pg;
2844-
let Inst{31-24} = 0b01100100;
2845-
let Inst{23-22} = opc{3-2};
2846-
let Inst{21-18} = 0b0000;
2847-
let Inst{17-16} = opc{1-0};
2848-
let Inst{15-13} = 0b101;
2849-
let Inst{12-10} = Pg;
2850-
let Inst{9-5} = Zn;
2851-
let Inst{4-0} = Zd;
2852-
let hasSideEffects = 0;
2853-
let mayRaiseFPException = 1;
2854-
}
2855-
2856-
multiclass sve_fp_fcvtntz<string asm> {
2857-
def _StoH : sve_fp_fcvt2z<0b1000, asm, ZPR16, ZPR32>;
2858-
def _DtoS : sve_fp_fcvt2z<0b1110, asm, ZPR32, ZPR64>;
2859-
}
2860-
2861-
multiclass sve_fp_fcvtltz<string asm, string op> {
2862-
def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
2863-
def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
2839+
multiclass sve2_fp_convert_up_long_z<string asm, string op> {
2840+
def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
2841+
def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
28642842

28652843
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
28662844
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
28672845
}
28682846

2847+
multiclass sve2_fp_convert_down_narrow_z<string asm> {
2848+
def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>;
2849+
def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
2850+
}
2851+
28692852
//===----------------------------------------------------------------------===//
28702853
// SVE2 Floating Point Pairwise Group
28712854
//===----------------------------------------------------------------------===//
@@ -9311,33 +9294,18 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
93119294
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
93129295
}
93139296

9314-
class sve_bfloat_convert<bit N, string asm>
9315-
: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
9316-
asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
9317-
bits<5> Zd;
9318-
bits<3> Pg;
9319-
bits<5> Zn;
9320-
let Inst{31-25} = 0b0110010;
9321-
let Inst{24} = N;
9322-
let Inst{23-13} = 0b10001010101;
9323-
let Inst{12-10} = Pg;
9324-
let Inst{9-5} = Zn;
9325-
let Inst{4-0} = Zd;
9297+
multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperator ir_op> {
9298+
def NAME : sve_fp_2op_p_zd<0b1001010, asm, ZPR32, ZPR16, ElementSizeS>;
93269299

9327-
let Constraints = "$Zd = $_Zd";
9328-
let DestructiveInstType = DestructiveOther;
9329-
let ElementSize = ElementSizeS;
9330-
let hasSideEffects = 0;
9331-
let mayRaiseFPException = 1;
9300+
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9301+
def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9302+
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
93329303
}
93339304

9334-
multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
9335-
SDPatternOperator ir_op = null_frag> {
9336-
def NAME : sve_bfloat_convert<N, asm>;
9305+
multiclass sve_bfloat_convert_top<string asm, SDPatternOperator op> {
9306+
def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
93379307

93389308
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9339-
def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
9340-
def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
93419309
}
93429310

93439311
//===----------------------------------------------------------------------===//

llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,17 @@ bfcvtnt z0.h, p8/m, z1.s
2020
// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
2121
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
2222

23+
// --------------------------------------------------------------------------//
24+
// Negative tests for instructions that are incompatible with movprfx
25+
2326
movprfx z0.h, p0/m, z7.h
2427
bfcvtnt z0.h, p0/m, z1.s
25-
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
28+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
2629
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
2730
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
31+
32+
movprfx z0, z7
33+
bfcvtnt z0.h, p7/m, z1.s
34+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
35+
// CHECK-NEXT: bfcvtnt z0.h, p7/m, z1.s
36+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

llvm/test/MC/AArch64/SVE/bfcvtnt.s

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,3 @@ bfcvtnt z0.H, p0/m, z1.S
99
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
1010
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
1111
// CHECK-ERROR: instruction requires: bf16 sve or sme
12-
13-
movprfx z0.S, p0/m, z2.S
14-
// CHECK-INST: movprfx z0.s, p0/m, z2.s
15-
// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
16-
// CHECK-ERROR: instruction requires: sve or sme
17-
18-
bfcvtnt z0.H, p0/m, z1.S
19-
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
20-
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
21-
// CHECK-ERROR: instruction requires: bf16 sve or sme
22-
23-
movprfx z0, z2
24-
// CHECK-INST: movprfx z0, z2
25-
// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
26-
// CHECK-ERROR: instruction requires: sve or sme
27-
28-
bfcvtnt z0.H, p0/m, z1.S
29-
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
30-
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
31-
// CHECK-ERROR: instruction requires: bf16 sve or sme

0 commit comments

Comments
 (0)