Skip to content

Commit 5bfbf99

Browse files
arsenmpravinjagtap
andcommitted
AMDGPU: MC support for v_cvt_scalef32_pk_{f|bf}16_fp4 of gfx950. (llvm#117418)
OPSEL ASM Syntax for v_cvt_scalef32_pk_{f|bf}16_fp4 : opsel:[x,y,z] where, x & y i.e. OPSEL[1 : 0] selects which src_byte to read. Note: Conventional Inst{13} i.e. OPSEL[2] is ignored in asm syntax. Co-authored-by: Pravin Jagtap <[email protected]> Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 7aa8396 commit 5bfbf99

File tree

4 files changed

+201
-5
lines changed

4 files changed

+201
-5
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,7 @@ def VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, f3
902902
let HasOMod = 0;
903903
}
904904

905-
def VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile : VOP3_Profile<VOPProfile<[v2f32, i32, f32, untyped]>,
905+
class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
906906
VOP3_OPSEL> {
907907
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
908908
FP32InputMods:$src1_modifiers, Src1RC64:$src1,
@@ -932,7 +932,7 @@ let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in
932932
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
933933
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
934934
defm V_CVT_SCALEF32_PK_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
935-
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
935+
defm V_CVT_SCALEF32_PK_F32_FP8 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
936936
defm V_CVT_SCALEF32_PK_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
937937
defm V_CVT_SCALEF32_PK_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_fp8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
938938
}
@@ -941,14 +941,16 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
941941
defm V_CVT_SCALEF32_F16_BF8 : VOP3Inst<"v_cvt_scalef32_f16_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
942942
defm V_CVT_SCALEF32_F32_BF8 : VOP3Inst<"v_cvt_scalef32_f32_bf8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
943943
defm V_CVT_SCALEF32_PK_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
944-
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
944+
defm V_CVT_SCALEF32_PK_F32_BF8 : VOP3Inst<"v_cvt_scalef32_pk_f32_bf8", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
945945
defm V_CVT_SCALEF32_PK_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_f16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
946946
defm V_CVT_SCALEF32_PK_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk_bf8_bf16", VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile>;
947947
}
948948

949949
let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
950-
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F32_FP4FP8BF8_Profile>;
950+
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
951951
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_Profile>;
952+
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
953+
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
952954
}
953955

954956
let SubtargetPredicate = isGFX10Plus in {
@@ -1869,4 +1871,6 @@ defm V_CVT_SCALEF32_PK_BF8_BF16: VOP3OpSel_Real_gfx9 <0x245>;
18691871
let OtherPredicates = [HasFP4ConversionScaleInsts] in {
18701872
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3OpSel_Real_gfx9 <0x23f>;
18711873
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
1874+
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
1875+
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
18721876
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,4 +788,100 @@ v_cvt_scalef32_pk_fp4_f32 v1, v1, -v2, |v3| op_sel:[0,0,1,1]
788788

789789
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
790790
// GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
791-
v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1]
791+
v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1]
792+
793+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
794+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
795+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3
796+
797+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
798+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00]
799+
v_cvt_scalef32_pk_f16_fp4 v1, v2, s3
800+
801+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
802+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00]
803+
v_cvt_scalef32_pk_f16_fp4 v1, s2, 3
804+
805+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
806+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00]
807+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0]
808+
809+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
810+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00]
811+
v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0]
812+
813+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
814+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00]
815+
v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0]
816+
817+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
818+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00]
819+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0]
820+
821+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
822+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00]
823+
v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0]
824+
825+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
826+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00]
827+
v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0]
828+
829+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
830+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00]
831+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0]
832+
833+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
834+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00]
835+
v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0]
836+
837+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
838+
// GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00]
839+
v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0]
840+
841+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
842+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00]
843+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3
844+
845+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
846+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00]
847+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3
848+
849+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
850+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00]
851+
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3
852+
853+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
854+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00]
855+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0]
856+
857+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
858+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00]
859+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0]
860+
861+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
862+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00]
863+
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0]
864+
865+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
866+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00]
867+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0]
868+
869+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
870+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00]
871+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0]
872+
873+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
874+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00]
875+
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0]
876+
877+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
878+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00]
879+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0]
880+
881+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
882+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00]
883+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0]
884+
885+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
886+
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
887+
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,27 @@ v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 div:2
101101

102102
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
103103
v_cvt_scalef32_pk_fp4_f32 v1, v1, v2, v3 clamp div:2
104+
105+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
106+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp
107+
108+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
109+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 mul:2
110+
111+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
112+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 div:2
113+
114+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
115+
v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 clamp div:2
116+
117+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
118+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp
119+
120+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
121+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 mul:2
122+
123+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
124+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2
125+
126+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
127+
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,3 +539,75 @@
539539

540540
# GFX950: v_cvt_scalef32_pk_fp4_f32 v1, v1, s2, 3 op_sel:[0,0,1,1] ; encoding: [0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02]
541541
0x01,0x60,0x3d,0xd2,0x01,0x05,0x0c,0x02
542+
543+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00]
544+
0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00
545+
546+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00]
547+
0x01,0x00,0x50,0xd2,0x02,0x07,0x00,0x00
548+
549+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00]
550+
0x01,0x00,0x50,0xd2,0x02,0x06,0x01,0x00
551+
552+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00]
553+
0x01,0x08,0x50,0xd2,0x02,0x07,0x02,0x00
554+
555+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00]
556+
0x01,0x08,0x50,0xd2,0x02,0x07,0x00,0x00
557+
558+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00]
559+
0x01,0x08,0x50,0xd2,0x02,0x06,0x01,0x00
560+
561+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00]
562+
0x01,0x10,0x50,0xd2,0x02,0x07,0x02,0x00
563+
564+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00]
565+
0x01,0x10,0x50,0xd2,0x02,0x07,0x00,0x00
566+
567+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00]
568+
0x01,0x10,0x50,0xd2,0x02,0x06,0x01,0x00
569+
570+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00]
571+
0x01,0x18,0x50,0xd2,0x02,0x07,0x02,0x00
572+
573+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00]
574+
0x01,0x18,0x50,0xd2,0x02,0x07,0x00,0x00
575+
576+
# GFX950: v_cvt_scalef32_pk_f16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00]
577+
0x01,0x18,0x50,0xd2,0x02,0x06,0x01,0x00
578+
579+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00]
580+
0x01,0x00,0x51,0xd2,0x02,0x07,0x02,0x00
581+
582+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00]
583+
0x01,0x00,0x51,0xd2,0x02,0x07,0x00,0x00
584+
585+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 ; encoding: [0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00]
586+
0x01,0x00,0x51,0xd2,0x02,0x06,0x01,0x00
587+
588+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00]
589+
0x01,0x08,0x51,0xd2,0x02,0x07,0x02,0x00
590+
591+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00]
592+
0x01,0x08,0x51,0xd2,0x02,0x07,0x00,0x00
593+
594+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,0,0] ; encoding: [0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00]
595+
0x01,0x08,0x51,0xd2,0x02,0x06,0x01,0x00
596+
597+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00]
598+
0x01,0x10,0x51,0xd2,0x02,0x07,0x02,0x00
599+
600+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00]
601+
0x01,0x10,0x51,0xd2,0x02,0x07,0x00,0x00
602+
603+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[0,1,0] ; encoding: [0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00]
604+
0x01,0x10,0x51,0xd2,0x02,0x06,0x01,0x00
605+
606+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00]
607+
0x01,0x18,0x51,0xd2,0x02,0x07,0x02,0x00
608+
609+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00]
610+
0x01,0x18,0x51,0xd2,0x02,0x07,0x00,0x00
611+
612+
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
613+
0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00

0 commit comments

Comments
 (0)