Skip to content

Commit 1abbc2a

Browse files
arsenmpravinjagtap
andcommitted
AMDGPU: MC support for v_cvt_scalef32_pk_{fp8|bf8}_f32 of gfx950. (llvm#117382)
OPSEL[3] selects low/high 16 bits of dest write. Co-authored-by: Pravin Jagtap <[email protected]>
1 parent e49f849 commit 1abbc2a

File tree

3 files changed

+100
-0
lines changed

3 files changed

+100
-0
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,14 +890,28 @@ class VOP3_CVT_SCALE_F1632_FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPPro
890890
let HasOMod = 0;
891891
}
892892

893+
def VOP3_CVT_SCALE_FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f32, f32]>,
894+
VOP3_OPSEL> {
895+
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
896+
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
897+
FP32InputMods:$src2_modifiers, Src2RC64:$src2,
898+
op_sel0:$op_sel);
899+
let HasClamp = 0;
900+
let HasExtVOP3DPP = 0;
901+
let HasOpSel = 1;
902+
let HasOMod = 0;
903+
}
904+
893905
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
894906
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
895907
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
908+
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
896909
}
897910

898911
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
899912
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
900913
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
914+
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
901915
}
902916

903917
let SubtargetPredicate = isGFX10Plus in {
@@ -1802,8 +1816,10 @@ defm V_BITOP3_B32 : VOP3_Real_BITOP3_gfx9<0x234, "v_bitop3_b32">;
18021816
let OtherPredicates = [HasFP8ConversionScaleInsts] in {
18031817
defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
18041818
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
1819+
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
18051820
}
18061821
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
18071822
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
18081823
defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
1824+
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
18091825
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,3 +501,51 @@ v_cvt_scalef32_f32_bf8 v1, 22, v3 op_sel:[1,0,1]
501501
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
502502
// GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00]
503503
v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1]
504+
505+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
506+
// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04]
507+
v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3
508+
509+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
510+
// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44]
511+
v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3|
512+
513+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
514+
// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02]
515+
v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3
516+
517+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
518+
// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04]
519+
v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1]
520+
521+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
522+
// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44]
523+
v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
524+
525+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
526+
// GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02]
527+
v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]
528+
529+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
530+
// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04]
531+
v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3
532+
533+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
534+
// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44]
535+
v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3|
536+
537+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
538+
// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02]
539+
v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3
540+
541+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
542+
// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04]
543+
v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1]
544+
545+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
546+
// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44]
547+
v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1]
548+
549+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
550+
// GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
551+
v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1]

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,3 +323,39 @@
323323

324324
# GFX950: v_cvt_scalef32_f32_bf8 v1, 44, v3 op_sel:[1,1,1] ; encoding: [0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00]
325325
0x01,0x58,0x3c,0xd2,0xac,0x06,0x02,0x00
326+
327+
# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04]
328+
0x01,0x00,0x35,0xd2,0x01,0x05,0x0e,0x04
329+
330+
# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44]
331+
0x01,0x04,0x35,0xd2,0x01,0x05,0x0e,0x44
332+
333+
# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02]
334+
0x01,0x00,0x35,0xd2,0x01,0x05,0x0c,0x02
335+
336+
# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04]
337+
0x01,0x40,0x35,0xd2,0x01,0x05,0x0e,0x04
338+
339+
# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44]
340+
0x01,0x44,0x35,0xd2,0x01,0x05,0x0e,0x44
341+
342+
# GFX950: v_cvt_scalef32_pk_fp8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02]
343+
0x01,0x40,0x35,0xd2,0x01,0x05,0x0c,0x02
344+
345+
# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04]
346+
0x01,0x00,0x36,0xd2,0x01,0x05,0x0e,0x04
347+
348+
# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| ; encoding: [0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44]
349+
0x01,0x04,0x36,0xd2,0x01,0x05,0x0e,0x44
350+
351+
# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 ; encoding: [0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02]
352+
0x01,0x00,0x36,0xd2,0x01,0x05,0x0c,0x02
353+
354+
# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, v2, v3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04]
355+
0x01,0x40,0x36,0xd2,0x01,0x05,0x0e,0x04
356+
357+
# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, -v2, |v3| op_sel:[0,0,0,1] ; encoding: [0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44]
358+
0x01,0x44,0x36,0xd2,0x01,0x05,0x0e,0x44
359+
360+
# GFX950: v_cvt_scalef32_pk_bf8_f32 v1, v1, s2, 3 op_sel:[0,0,0,1] ; encoding: [0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02]
361+
0x01,0x40,0x36,0xd2,0x01,0x05,0x0c,0x02

0 commit comments

Comments
 (0)