Skip to content

Commit 8d6bf9b

Browse files
arsenmpravinjagtap
andcommitted
AMDGPU: MC support for v_cvt_scalef32_pk_fp4_{f|bf}16 on gfx950. (llvm#117594)
These instructions have non-standard use of OPSEL bits to select dest write byte. The src2_modifiers operand is used without having its corresponding src2 operand by introducing dummy src2. Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 369ae61 commit 8d6bf9b

File tree

5 files changed

+123
-1
lines changed

5 files changed

+123
-1
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8703,7 +8703,9 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
87038703

87048704
const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
87058705

8706-
if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8706+
if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
8707+
Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
8708+
Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
87078709
Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
87088710
Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
87098711
Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,23 @@ def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f3
902902
let HasOMod = 0;
903903
}
904904

905+
def VOP3_CVT_SCALE_FP4_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f16, f32, f32]>,
906+
VOP3_OPSEL> {
907+
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
908+
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
909+
FP32InputMods:$src2_modifiers, VGPR_32:$src2,
910+
op_sel0:$op_sel);
911+
let HasClamp = 0;
912+
let HasSrc2 = 0;
913+
let HasSrc2Mods = 1;
914+
let HasOpSel = 1;
915+
let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
916+
getAsmVOP3OpSel<3, HasClamp, HasOMod,
917+
HasSrc0FloatMods, HasSrc1FloatMods,
918+
HasSrc2FloatMods>.ret);
919+
let HasExtVOP3DPP = 0;
920+
}
921+
905922
class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
906923
VOP3_OPSEL> {
907924
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
@@ -968,6 +985,13 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
968985
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
969986
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
970987
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
988+
989+
// These instructions have non-standard use of op_sel. In particular they are
990+
// using op_sel bits 2 and 3 while only having two sources.
991+
let Constraints = "$vdst = $src2", DisableEncoding = "$src2" in {
992+
defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f16", VOP3_CVT_SCALE_FP4_F16BF16_Profile>;
993+
defm V_CVT_SCALEF32_PK_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp4_bf16", VOP3_CVT_SCALE_FP4_F16BF16_Profile>;
994+
}
971995
}
972996

973997
let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in {
@@ -1910,6 +1934,8 @@ defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;
19101934
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
19111935
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
19121936
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
1937+
defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x24c>;
1938+
defm V_CVT_SCALEF32_PK_FP4_BF16: VOP3OpSel_Real_gfx9_forced_opsel2 <0x24d>;
19131939
}
19141940
let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
19151941
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,3 +1025,43 @@ v_cvt_scalef32_pk_bf16_bf8 v1, v2, s3 op_sel:[1,0,0]
10251025
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
10261026
// GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00]
10271027
v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0]
1028+
1029+
// NOT-GFX950: error: instruction not supported on this GPU
1030+
// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
1031+
v_cvt_scalef32_pk_fp4_f16 v1, v2, v3
1032+
1033+
// NOT-GFX950: error: instruction not supported on this GPU
1034+
// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00]
1035+
v_cvt_scalef32_pk_fp4_f16 v1, s2, 3
1036+
1037+
// NOT-GFX950: error: instruction not supported on this GPU
1038+
// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00]
1039+
v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1]
1040+
1041+
// NOT-GFX950: error: instruction not supported on this GPU
1042+
// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00]
1043+
v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1]
1044+
1045+
// NOT-GFX950: error: instruction not supported on this GPU
1046+
// GFX950: v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20]
1047+
v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3
1048+
1049+
// NOT-GFX950: error: instruction not supported on this GPU
1050+
// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00]
1051+
v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3
1052+
1053+
// NOT-GFX950: error: instruction not supported on this GPU
1054+
// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00]
1055+
v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3
1056+
1057+
// NOT-GFX950: error: instruction not supported on this GPU
1058+
// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00]
1059+
v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1]
1060+
1061+
// NOT-GFX950: error: instruction not supported on this GPU
1062+
// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00]
1063+
v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1]
1064+
1065+
// NOT-GFX950: error: instruction not supported on this GPU
1066+
// GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20]
1067+
v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,3 +293,27 @@ v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 div:2
293293

294294
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
295295
v_cvt_scalef32_pk_bf16_bf8 v[20:25], v[10:25], v8 clamp div:2
296+
297+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
298+
v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 clamp
299+
300+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
301+
v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 mul:2
302+
303+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
304+
v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 div:2
305+
306+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
307+
v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 clamp div:2
308+
309+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
310+
v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 clamp
311+
312+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
313+
v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 mul:2
314+
315+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
316+
v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 div:2
317+
318+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
319+
v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,3 +713,33 @@
713713

714714
# GFX950: v_cvt_scalef32_pk_bf16_bf8 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00]
715715
0x01,0x08,0x6a,0xd2,0x02,0x06,0x01,0x00
716+
717+
# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
718+
0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00
719+
720+
# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00]
721+
0x01,0x00,0x4c,0xd2,0x02,0x06,0x01,0x00
722+
723+
# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00]
724+
0x01,0x60,0x4c,0xd2,0x02,0x07,0x02,0x00
725+
726+
# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00]
727+
0x01,0x40,0x4c,0xd2,0x02,0x07,0x02,0x00
728+
729+
# GFX950: v_cvt_scalef32_pk_fp4_f16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20]
730+
0x01,0x01,0x4c,0xd2,0x02,0x06,0x02,0x20
731+
732+
# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00]
733+
0x01,0x00,0x4d,0xd2,0x02,0x07,0x02,0x00
734+
735+
# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00]
736+
0x01,0x00,0x4d,0xd2,0x02,0x06,0x01,0x00
737+
738+
# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00]
739+
0x01,0x60,0x4d,0xd2,0x02,0x07,0x02,0x00
740+
741+
# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00]
742+
0x01,0x40,0x4d,0xd2,0x02,0x07,0x02,0x00
743+
744+
# GFX950: v_cvt_scalef32_pk_fp4_bf16 v1, -|s2|, v3 ; encoding: [0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20]
745+
0x01,0x01,0x4d,0xd2,0x02,0x06,0x02,0x20

0 commit comments

Comments
 (0)