Skip to content

Commit c428258

Browse files
arsenmpravinjagtap
andcommitted
AMDGPU: MC support for v_cvt_scalef32_pk_{fp|bf}8_{f|bf}16 of gfx950. (llvm#117384)
OPSEL ASM Syntax: opsel:[x,y,z] where, opsel[z] = Inst{14} = src0_modifier{3} Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax. Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 4fa5dbd commit c428258

File tree

4 files changed

+237
-0
lines changed

4 files changed

+237
-0
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,18 +915,35 @@ def VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32,
915915
let HasOMod = 0;
916916
}
917917

918+
def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f16, f32, untyped]>,
919+
VOP3_OPSEL> {
920+
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
921+
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
922+
op_sel0:$op_sel);
923+
let HasClamp = 0;
924+
let HasSrc2 = 0;
925+
let HasSrc2Mods = 0;
926+
let HasExtVOP3DPP = 0;
927+
let HasOpSel = 1;
928+
let HasOMod = 0;
929+
}
930+
918931
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
919932
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
920933
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
921934
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
922935
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
936+
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
937+
defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
923938
}
924939

925940
let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in {
926941
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
927942
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
928943
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP8BF8_F32_Profile>;
929944
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP8BF8_Profile>;
945+
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
946+
defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
930947
}
931948

932949
let SubtargetPredicate = isGFX10Plus in {
@@ -1833,10 +1850,14 @@ defm V_CVT_SCALEF32_F16_FP8 : VOP3OpSel_Real_gfx9 <0x24a>;
18331850
defm V_CVT_SCALEF32_F32_FP8 : VOP3OpSel_Real_gfx9 <0x23b>;
18341851
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3OpSel_Real_gfx9 <0x235>;
18351852
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3OpSel_Real_gfx9 <0x239>;
1853+
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3OpSel_Real_gfx9 <0x240>;
1854+
defm V_CVT_SCALEF32_PK_FP8_BF16: VOP3OpSel_Real_gfx9 <0x244>;
18361855
}
18371856
let OtherPredicates = [HasBF8ConversionScaleInsts] in {
18381857
defm V_CVT_SCALEF32_F16_BF8 : VOP3OpSel_Real_gfx9 <0x24b>;
18391858
defm V_CVT_SCALEF32_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23c>;
18401859
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3OpSel_Real_gfx9 <0x236>;
18411860
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3OpSel_Real_gfx9 <0x23a>;
1861+
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3OpSel_Real_gfx9 <0x241>;
1862+
defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
18421863
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,3 +597,99 @@ v_cvt_scalef32_pk_f32_bf8 v[2:3], v2, s3 op_sel:[1,0,0]
597597
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
598598
// GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
599599
v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0]
600+
601+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
602+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00]
603+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3
604+
605+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
606+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20]
607+
v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3|
608+
609+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
610+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00]
611+
v_cvt_scalef32_pk_fp8_f16 v1, s2, 3
612+
613+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
614+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00]
615+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1]
616+
617+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
618+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20]
619+
v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1]
620+
621+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
622+
// GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00]
623+
v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1]
624+
625+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
626+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
627+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3
628+
629+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
630+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20]
631+
v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3|
632+
633+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
634+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00]
635+
v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3
636+
637+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
638+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00]
639+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1]
640+
641+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
642+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20]
643+
v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
644+
645+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
646+
// GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00]
647+
v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1]
648+
649+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
650+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00]
651+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3
652+
653+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
654+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20]
655+
v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3|
656+
657+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
658+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00]
659+
v_cvt_scalef32_pk_bf8_f16 v1, s2, 3
660+
661+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
662+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00]
663+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1]
664+
665+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
666+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20]
667+
v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1]
668+
669+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
670+
// GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00]
671+
v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1]
672+
673+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
674+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00]
675+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3
676+
677+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
678+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20]
679+
v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3|
680+
681+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
682+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00]
683+
v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3
684+
685+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
686+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00]
687+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1]
688+
689+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
690+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20]
691+
v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1]
692+
693+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
694+
// GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
695+
v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1]

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,51 @@ v_permlane16_swap_b32_e32 v1, v2 fi:0
2929

3030
// GFX950: :[[@LINE+1]]:34: error: invalid operand for instruction
3131
v_permlane16_swap_b32_e32 v1, v2 bound_ctrl:1 fi:1
32+
33+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
34+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp
35+
36+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
37+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 mul:2
38+
39+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
40+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 div:2
41+
42+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
43+
v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 clamp div:2
44+
45+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
46+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp
47+
48+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
49+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 mul:2
50+
51+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
52+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 div:2
53+
54+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
55+
v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 clamp div:2
56+
57+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
58+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp
59+
60+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
61+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 mul:2
62+
63+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
64+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 div:2
65+
66+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
67+
v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 clamp div:2
68+
69+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
70+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp
71+
72+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
73+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 mul:2
74+
75+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
76+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 div:2
77+
78+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
79+
v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,3 +395,75 @@
395395

396396
# GFX950: v_cvt_scalef32_pk_f32_bf8 v[2:3], s2, 3 op_sel:[1,0,0] ; encoding: [0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00]
397397
0x02,0x08,0x3a,0xd2,0x02,0x06,0x01,0x00
398+
399+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00]
400+
0x01,0x00,0x40,0xd2,0x02,0x07,0x02,0x00
401+
402+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20]
403+
0x01,0x02,0x40,0xd2,0x02,0x07,0x02,0x20
404+
405+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00]
406+
0x01,0x00,0x40,0xd2,0x02,0x06,0x01,0x00
407+
408+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00]
409+
0x01,0x40,0x40,0xd2,0x02,0x07,0x02,0x00
410+
411+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20]
412+
0x01,0x42,0x40,0xd2,0x02,0x07,0x02,0x20
413+
414+
# GFX950: v_cvt_scalef32_pk_fp8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00]
415+
0x01,0x40,0x40,0xd2,0x02,0x06,0x01,0x00
416+
417+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00]
418+
0x01,0x00,0x44,0xd2,0x02,0x07,0x02,0x00
419+
420+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20]
421+
0x01,0x02,0x44,0xd2,0x02,0x07,0x02,0x20
422+
423+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00]
424+
0x01,0x00,0x44,0xd2,0x02,0x06,0x01,0x00
425+
426+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00]
427+
0x01,0x40,0x44,0xd2,0x02,0x07,0x02,0x00
428+
429+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20]
430+
0x01,0x42,0x44,0xd2,0x02,0x07,0x02,0x20
431+
432+
# GFX950: v_cvt_scalef32_pk_fp8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00]
433+
0x01,0x40,0x44,0xd2,0x02,0x06,0x01,0x00
434+
435+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00]
436+
0x01,0x00,0x41,0xd2,0x02,0x07,0x02,0x00
437+
438+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20]
439+
0x01,0x02,0x41,0xd2,0x02,0x07,0x02,0x20
440+
441+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 ; encoding: [0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00]
442+
0x01,0x00,0x41,0xd2,0x02,0x06,0x01,0x00
443+
444+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00]
445+
0x01,0x40,0x41,0xd2,0x02,0x07,0x02,0x00
446+
447+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20]
448+
0x01,0x42,0x41,0xd2,0x02,0x07,0x02,0x20
449+
450+
# GFX950: v_cvt_scalef32_pk_bf8_f16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00]
451+
0x01,0x40,0x41,0xd2,0x02,0x06,0x01,0x00
452+
453+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00]
454+
0x01,0x00,0x45,0xd2,0x02,0x07,0x02,0x00
455+
456+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| ; encoding: [0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20]
457+
0x01,0x02,0x45,0xd2,0x02,0x07,0x02,0x20
458+
459+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 ; encoding: [0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00]
460+
0x01,0x00,0x45,0xd2,0x02,0x06,0x01,0x00
461+
462+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, v2, v3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00]
463+
0x01,0x40,0x45,0xd2,0x02,0x07,0x02,0x00
464+
465+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, -v2, |v3| op_sel:[0,0,1] ; encoding: [0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20]
466+
0x01,0x42,0x45,0xd2,0x02,0x07,0x02,0x20
467+
468+
# GFX950: v_cvt_scalef32_pk_bf8_bf16 v1, s2, 3 op_sel:[0,0,1] ; encoding: [0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00]
469+
0x01,0x40,0x45,0xd2,0x02,0x06,0x01,0x00

0 commit comments

Comments
 (0)