Skip to content

Commit 4fa5dbd

Browse files
arsenmpravinjagtap
andcommitted
AMDGPU: MC support for v_cvt_scalef32_pk_f32_[fp|bf]8 of gfx950. (llvm#117383)
OPSEL[0] selects srcword to read. Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 1abbc2a commit 4fa5dbd

File tree

3 files changed

+101
-0
lines changed

3 files changed

+101
-0
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -902,16 +902,31 @@ def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32,
902902
let HasOMod = 0;
903903
}
904904

905+
def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
906+
VOP3_OPSEL> {
907+
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
908+
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
909+
op_sel0:$op_sel);
910+
let HasClamp = 0;
911+
let HasSrc2 = 0;
912+
let HasSrc2Mods = 0;
913+
let HasExtVOP3DPP = 0;
914+
let HasOpSel = 1;
915+
let HasOMod = 0;
916+
}
917+
905918
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
906919
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
907920
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
908921
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
922+
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
909923
}
910924

911925
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
912926
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
913927
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
914928
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
929+
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
915930
}
916931

917932
let SubtargetPredicate = isGFX10Plus in {
@@ -1817,9 +1832,11 @@ let OtherPredicates = [HasFP8ConversionScaleInsts] in {
18171832
defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
18181833
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
18191834
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
1835+
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
18201836
}
18211837
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
18221838
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
18231839
defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
18241840
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
1841+
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
18251842
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,3 +549,51 @@ v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
549549
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
550550
// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
551551
v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]
552+
553+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
554+
// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00]
555+
v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3
556+
557+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
558+
// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00]
559+
v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3
560+
561+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
562+
// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00]
563+
v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3
564+
565+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
566+
// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00]
567+
v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0]
568+
569+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
570+
// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00]
571+
v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0]
572+
573+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
574+
// GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00]
575+
v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0]
576+
577+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
578+
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00]
579+
v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3
580+
581+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
582+
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00]
583+
v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3
584+
585+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
586+
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00]
587+
v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3
588+
589+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
590+
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00]
591+
v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0]
592+
593+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
594+
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00]
595+
v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0]
596+
597+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
598+
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
599+
v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0]

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,3 +359,39 @@
359359

360360
# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
361361
0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02
362+
363+
# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00]
364+
0x02,0x00,0x39,0xd2,0x02,0x07,0x02,0x00
365+
366+
# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00]
367+
0x02,0x00,0x39,0xd2,0x02,0x07,0x00,0x00
368+
369+
# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00]
370+
0x02,0x00,0x39,0xd2,0x02,0x06,0x01,0x00
371+
372+
# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00]
373+
0x02,0x08,0x39,0xd2,0x02,0x07,0x02,0x00
374+
375+
# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00]
376+
0x02,0x08,0x39,0xd2,0x02,0x07,0x00,0x00
377+
378+
# GFX950: v_cvt_scalef32_pk_f32_fp8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00]
379+
0x02,0x08,0x39,0xd2,0x02,0x06,0x01,0x00
380+
381+
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00]
382+
0x02,0x00,0x3a,0xd2,0x02,0x07,0x02,0x00
383+
384+
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00]
385+
0x02,0x00,0x3a,0xd2,0x02,0x07,0x00,0x00
386+
387+
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 ; encoding: [0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00]
388+
0x02,0x00,0x3a,0xd2,0x02,0x06,0x01,0x00
389+
390+
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, v3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00]
391+
0x02,0x08,0x3a,0xd2,0x02,0x07,0x02,0x00
392+
393+
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00]
394+
0x02,0x08,0x3a,0xd2,0x02,0x07,0x00,0x00
395+
396+
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
397+
0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00

0 commit comments

Comments
 (0)