-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64] Fixup destructive floating-point precision conversions #118788
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-mc Author: None (SpencerAbson) ChangesThis patch changes the zeroing forms of This patch also makes the merging form of
Full diff: https://github.com/llvm/llvm-project/pull/118788.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a15e89be1a24b2..b6cb9d54b84aa1 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2455,8 +2455,9 @@ let Predicates = [HasBF16, HasSVEorSME] in {
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
- defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
- defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
+
+ defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
+ defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
} // End HasBF16, HasSVEorSME
let Predicates = [HasSVEorSME] in {
@@ -4268,17 +4269,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
- defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
- def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
+ defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
+ def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
// Placing even
- defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
+ defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
// SVE2p2 floating-point convert precision up, zeroing predicate
- defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt", "int_aarch64_sve_fcvtlt">;
+ defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
- def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
- // Placing corresponding
+ def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
// Floating-point convert to integer, zeroing predicate
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 828a048eaf6fb2..3e07048f03907c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2787,10 +2787,12 @@ multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
// SVE2 Floating Point Convert Group
//===----------------------------------------------------------------------===//
-class sve2_fp_convert_precision<bits<4> opc, string asm,
- ZPRRegOp zprty1, ZPRRegOp zprty2>
-: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
- asm, "\t$Zd, $Pg/m, $Zn",
+class sve2_fp_convert_precision<bits<4> opc, bit merging, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2, bit destructive=merging>
+: I<(outs zprty1:$Zd),
+ !if(destructive, (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
+ (ins PPR3bAny:$Pg, zprty2:$Zn)),
+ asm, "\t$Zd, " # !if(merging, "$Pg/m", "$Pg/z") # ", $Zn",
"",
[]>, Sched<[]> {
bits<5> Zd;
@@ -2798,74 +2800,55 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
bits<3> Pg;
let Inst{31-24} = 0b01100100;
let Inst{23-22} = opc{3-2};
- let Inst{21-18} = 0b0010;
+ let Inst{21-20} = 0b00;
+ let Inst{19} = merging;
+ let Inst{18} = 0b0;
let Inst{17-16} = opc{1-0};
let Inst{15-13} = 0b101;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
- let Constraints = "$Zd = $_Zd";
+ let Constraints = !if(destructive, "$Zd = $_Zd", "");
let hasSideEffects = 0;
let mayRaiseFPException = 1;
}
multiclass sve2_fp_convert_down_narrow<string asm, string op> {
- def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
- def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+ def _StoH : sve2_fp_convert_precision<0b1000, 0b1, asm, ZPR16, ZPR32>;
+ def _DtoS : sve2_fp_convert_precision<0b1110, 0b1, asm, ZPR32, ZPR64>;
def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve2_fp_convert_up_long<string asm, string op> {
- def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
- def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+ def _HtoS : sve2_fp_convert_precision<0b1001, 0b1, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_fp_convert_precision<0b1111, 0b1, asm, ZPR64, ZPR32>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
- def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+ def _DtoS : sve2_fp_convert_precision<0b0010, 0b1, asm, ZPR32, ZPR64>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
-class sve_fp_fcvt2z<bits<4> opc, string asm, ZPRRegOp zprty1,
- ZPRRegOp zprty2>
- : I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
- asm, "\t$Zd, $Pg/z, $Zn",
- "",
- []>, Sched<[]> {
- bits<5> Zd;
- bits<5> Zn;
- bits<3> Pg;
- let Inst{31-24} = 0b01100100;
- let Inst{23-22} = opc{3-2};
- let Inst{21-18} = 0b0000;
- let Inst{17-16} = opc{1-0};
- let Inst{15-13} = 0b101;
- let Inst{12-10} = Pg;
- let Inst{9-5} = Zn;
- let Inst{4-0} = Zd;
- let hasSideEffects = 0;
- let mayRaiseFPException = 1;
-}
-
-multiclass sve_fp_fcvtntz<string asm> {
- def _StoH : sve_fp_fcvt2z<0b1000, asm, ZPR16, ZPR32>;
- def _DtoS : sve_fp_fcvt2z<0b1110, asm, ZPR32, ZPR64>;
-}
-
-multiclass sve_fp_fcvtltz<string asm, string op> {
- def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
- def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
+multiclass sve2_fp_convert_up_long_z<string asm, string op> {
+ def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
+multiclass sve2_fp_convert_down_narrow_z<string asm> {
+ def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>;
+ def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
+}
+
//===----------------------------------------------------------------------===//
// SVE2 Floating Point Pairwise Group
//===----------------------------------------------------------------------===//
@@ -9296,33 +9279,18 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
}
-class sve_bfloat_convert<bit N, string asm>
-: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
- asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
- bits<5> Zd;
- bits<3> Pg;
- bits<5> Zn;
- let Inst{31-25} = 0b0110010;
- let Inst{24} = N;
- let Inst{23-13} = 0b10001010101;
- let Inst{12-10} = Pg;
- let Inst{9-5} = Zn;
- let Inst{4-0} = Zd;
+multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperator ir_op> {
+ def NAME : sve_fp_2op_p_zd<0b1001010, asm, ZPR32, ZPR16, ElementSizeS>;
- let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = DestructiveOther;
- let ElementSize = ElementSizeS;
- let hasSideEffects = 0;
- let mayRaiseFPException = 1;
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
}
-multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
- SDPatternOperator ir_op = null_frag> {
- def NAME : sve_bfloat_convert<N, asm>;
+multiclass sve_bfloat_convert_top<string asm, SDPatternOperator op> {
+ def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
- def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
- def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
index d21a555ff87c60..644fe82ab9409a 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
@@ -20,8 +20,17 @@ bfcvtnt z0.h, p8/m, z1.s
// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
movprfx z0.h, p0/m, z7.h
bfcvtnt z0.h, p0/m, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+bfcvtnt z0.h, p7/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bfcvtnt z0.h, p7/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt.s b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
index 5f3b71e28b91e0..b374a27ecfb9ab 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
@@ -9,23 +9,3 @@ bfcvtnt z0.H, p0/m, z1.S
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0.S, p0/m, z2.S
-// CHECK-INST: movprfx z0.s, p0/m, z2.s
-// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0, z2
-// CHECK-INST: movprfx z0, z2
-// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme
|
@llvm/pr-subscribers-backend-aarch64 Author: None (SpencerAbson) ChangesThis patch changes the zeroing forms of This patch also makes the merging form of
Full diff: https://github.com/llvm/llvm-project/pull/118788.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a15e89be1a24b2..b6cb9d54b84aa1 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2455,8 +2455,9 @@ let Predicates = [HasBF16, HasSVEorSME] in {
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
- defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
- defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
+
+ defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
+ defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
} // End HasBF16, HasSVEorSME
let Predicates = [HasSVEorSME] in {
@@ -4268,17 +4269,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
- defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
- def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
+ defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
+ def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
// Placing even
- defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
+ defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
// SVE2p2 floating-point convert precision up, zeroing predicate
- defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt", "int_aarch64_sve_fcvtlt">;
+ defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
- def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
- // Placing corresponding
+ def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
// Floating-point convert to integer, zeroing predicate
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 828a048eaf6fb2..3e07048f03907c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2787,10 +2787,12 @@ multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
// SVE2 Floating Point Convert Group
//===----------------------------------------------------------------------===//
-class sve2_fp_convert_precision<bits<4> opc, string asm,
- ZPRRegOp zprty1, ZPRRegOp zprty2>
-: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
- asm, "\t$Zd, $Pg/m, $Zn",
+class sve2_fp_convert_precision<bits<4> opc, bit merging, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2, bit destructive=merging>
+: I<(outs zprty1:$Zd),
+ !if(destructive, (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
+ (ins PPR3bAny:$Pg, zprty2:$Zn)),
+ asm, "\t$Zd, " # !if(merging, "$Pg/m", "$Pg/z") # ", $Zn",
"",
[]>, Sched<[]> {
bits<5> Zd;
@@ -2798,74 +2800,55 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
bits<3> Pg;
let Inst{31-24} = 0b01100100;
let Inst{23-22} = opc{3-2};
- let Inst{21-18} = 0b0010;
+ let Inst{21-20} = 0b00;
+ let Inst{19} = merging;
+ let Inst{18} = 0b0;
let Inst{17-16} = opc{1-0};
let Inst{15-13} = 0b101;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
- let Constraints = "$Zd = $_Zd";
+ let Constraints = !if(destructive, "$Zd = $_Zd", "");
let hasSideEffects = 0;
let mayRaiseFPException = 1;
}
multiclass sve2_fp_convert_down_narrow<string asm, string op> {
- def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
- def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+ def _StoH : sve2_fp_convert_precision<0b1000, 0b1, asm, ZPR16, ZPR32>;
+ def _DtoS : sve2_fp_convert_precision<0b1110, 0b1, asm, ZPR32, ZPR64>;
def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve2_fp_convert_up_long<string asm, string op> {
- def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
- def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+ def _HtoS : sve2_fp_convert_precision<0b1001, 0b1, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_fp_convert_precision<0b1111, 0b1, asm, ZPR64, ZPR32>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
- def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+ def _DtoS : sve2_fp_convert_precision<0b0010, 0b1, asm, ZPR32, ZPR64>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
-class sve_fp_fcvt2z<bits<4> opc, string asm, ZPRRegOp zprty1,
- ZPRRegOp zprty2>
- : I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
- asm, "\t$Zd, $Pg/z, $Zn",
- "",
- []>, Sched<[]> {
- bits<5> Zd;
- bits<5> Zn;
- bits<3> Pg;
- let Inst{31-24} = 0b01100100;
- let Inst{23-22} = opc{3-2};
- let Inst{21-18} = 0b0000;
- let Inst{17-16} = opc{1-0};
- let Inst{15-13} = 0b101;
- let Inst{12-10} = Pg;
- let Inst{9-5} = Zn;
- let Inst{4-0} = Zd;
- let hasSideEffects = 0;
- let mayRaiseFPException = 1;
-}
-
-multiclass sve_fp_fcvtntz<string asm> {
- def _StoH : sve_fp_fcvt2z<0b1000, asm, ZPR16, ZPR32>;
- def _DtoS : sve_fp_fcvt2z<0b1110, asm, ZPR32, ZPR64>;
-}
-
-multiclass sve_fp_fcvtltz<string asm, string op> {
- def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
- def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
+multiclass sve2_fp_convert_up_long_z<string asm, string op> {
+ def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
+multiclass sve2_fp_convert_down_narrow_z<string asm> {
+ def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>;
+ def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
+}
+
//===----------------------------------------------------------------------===//
// SVE2 Floating Point Pairwise Group
//===----------------------------------------------------------------------===//
@@ -9296,33 +9279,18 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
}
-class sve_bfloat_convert<bit N, string asm>
-: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
- asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
- bits<5> Zd;
- bits<3> Pg;
- bits<5> Zn;
- let Inst{31-25} = 0b0110010;
- let Inst{24} = N;
- let Inst{23-13} = 0b10001010101;
- let Inst{12-10} = Pg;
- let Inst{9-5} = Zn;
- let Inst{4-0} = Zd;
+multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperator ir_op> {
+ def NAME : sve_fp_2op_p_zd<0b1001010, asm, ZPR32, ZPR16, ElementSizeS>;
- let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = DestructiveOther;
- let ElementSize = ElementSizeS;
- let hasSideEffects = 0;
- let mayRaiseFPException = 1;
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
}
-multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
- SDPatternOperator ir_op = null_frag> {
- def NAME : sve_bfloat_convert<N, asm>;
+multiclass sve_bfloat_convert_top<string asm, SDPatternOperator op> {
+ def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
- def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
- def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
index d21a555ff87c60..644fe82ab9409a 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
@@ -20,8 +20,17 @@ bfcvtnt z0.h, p8/m, z1.s
// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
movprfx z0.h, p0/m, z7.h
bfcvtnt z0.h, p0/m, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+bfcvtnt z0.h, p7/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bfcvtnt z0.h, p7/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt.s b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
index 5f3b71e28b91e0..b374a27ecfb9ab 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
@@ -9,23 +9,3 @@ bfcvtnt z0.H, p0/m, z1.S
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0.S, p0/m, z2.S
-// CHECK-INST: movprfx z0.s, p0/m, z2.s
-// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0, z2
-// CHECK-INST: movprfx z0, z2
-// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Spencer for your patch.
There are some changes that are not related to the patch.
For instance formatting, maybe next time leave this for another patch that not even needs review.
There is also some refactoring, that I am not sure if it was not possible to be in another patch as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch changes the zeroing forms of
FCVTXNT
,FCVTNT
, andBFCVTNT
such that their destination operand is also listed as a dag input. These narrowing down-conversions leave the even elements of the destination vector unchanged, regardless of the predicate type.This patch also makes the merging form of
BFCVTNT
non-movprfx'able.FCVTXNT
- Arm DeveloperFCVTNT
- Arm DeveloperBFCVTNT
- Arm Developer