Skip to content

Commit 244a560

Browse files
shiltianarsenm
authored andcommitted
AMDGPU: MC support for V_CVT_SCALE_SR_FP4 instructions
Co-authored-by: Shilei Tian <[email protected]>
1 parent c8ee1ee commit 244a560

File tree

5 files changed

+205
-1
lines changed

5 files changed

+205
-1
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,10 @@ class AMDGPUOperand : public MCParsedAsmOperand {
336336
return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
337337
}
338338

339+
bool isPackedFP32InputMods() const {
340+
return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::v2f32);
341+
}
342+
339343
bool isVReg() const {
340344
return isRegClass(AMDGPU::VGPR_32RegClassID) ||
341345
isRegClass(AMDGPU::VReg_64RegClassID) ||

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,6 +1535,7 @@ class PackedIntInputModsMatchClass <int opSize> : AsmOperandClass {
15351535

15361536
def PackedF16InputModsMatchClass : PackedFPInputModsMatchClass<16>;
15371537
def PackedI16InputModsMatchClass : PackedIntInputModsMatchClass<16>;
1538+
def PackedF32InputModsMatchClass : PackedFPInputModsMatchClass<32>;
15381539

15391540
class PackedFPInputMods <PackedFPInputModsMatchClass matchClass> : InputMods <matchClass> {
15401541
let PrintMethod = "printOperandAndFPInputMods";
@@ -1546,6 +1547,7 @@ class PackedIntInputMods <PackedIntInputModsMatchClass matchClass> : InputMods <
15461547

15471548
def PackedF16InputMods : PackedFPInputMods<PackedF16InputModsMatchClass>;
15481549
def PackedI16InputMods : PackedIntInputMods<PackedI16InputModsMatchClass>;
1550+
def PackedF32InputMods : PackedFPInputMods<PackedF32InputModsMatchClass>;
15491551

15501552
def MFMALdScaleModifierOp : TImmLeaf<i32, [{
15511553
return isUInt<2>(Imm);

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,29 @@ class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<
983983
let HasExtVOP3DPP = 0;
984984
}
985985

986+
class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
987+
VOP3_Profile<VOPProfile<[i32, Src0Ty, i32, f32]>, VOP3_OPSEL> {
988+
let InsVOP3OpSel = (ins PackedF16InputMods: $src0_modifiers, Src0RC64:$src0,
989+
Int32InputMods: $src1_modifiers, Src1RC64:$src1,
990+
FP32InputMods: $src2_modifiers, Src2RC64:$src2,
991+
VGPR_32:$vdst_in, op_sel0:$op_sel);
992+
let HasClamp = 0;
993+
let HasExtVOP3DPP = 0;
994+
let HasOpSel = 1;
995+
let HasOMod = 0;
996+
}
997+
998+
def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
999+
let InsVOP3OpSel = (ins PackedF32InputMods: $src0_modifiers, Src0RC64:$src0,
1000+
Int32InputMods: $src1_modifiers, Src1RC64:$src1,
1001+
FP32InputMods: $src2_modifiers, Src2RC64:$src2,
1002+
VGPR_32:$vdst_in, op_sel0:$op_sel);
1003+
let HasClamp = 0;
1004+
let HasExtVOP3DPP = 0;
1005+
let HasOpSel = 1;
1006+
let HasOMod = 0;
1007+
}
1008+
9861009
class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
9871010
VOP3_OPSEL> {
9881011
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
@@ -1049,8 +1072,12 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
10491072

10501073
let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
10511074
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
1052-
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in
1075+
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
10531076
defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOP_I32_F32_F32_F32>>;
1077+
defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
1078+
defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
1079+
defm V_CVT_SCALEF32_SR_PK_FP4_F32: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f32", VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile>;
1080+
}
10541081
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f16>>;
10551082
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
10561083

@@ -2117,6 +2144,9 @@ defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
21172144
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
21182145
defm V_CVT_SCALEF32_PK_FP4_F16 : VOP3OpSel_Real_gfx9_forced_opsel2 <0x24c>;
21192146
defm V_CVT_SCALEF32_PK_FP4_BF16: VOP3OpSel_Real_gfx9_forced_opsel2 <0x24d>;
2147+
defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3OpSel_Real_gfx9 <0x24e>;
2148+
defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3OpSel_Real_gfx9 <0x24f>;
2149+
defm V_CVT_SCALEF32_SR_PK_FP4_F32: VOP3OpSel_Real_gfx9 <0x23e>;
21202150
}
21212151
let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
21222152
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,3 +1279,99 @@ v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
12791279
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
12801280
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
12811281
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
1282+
1283+
// NOT-GFX950: error: instruction not supported on this GPU
1284+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04]
1285+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5
1286+
1287+
// NOT-GFX950: error: instruction not supported on this GPU
1288+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04]
1289+
v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5
1290+
1291+
// NOT-GFX950: error: instruction not supported on this GPU
1292+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04]
1293+
v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5
1294+
1295+
// NOT-GFX950: error: instruction not supported on this GPU
1296+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04]
1297+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1]
1298+
1299+
// NOT-GFX950: error: instruction not supported on this GPU
1300+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04]
1301+
v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1]
1302+
1303+
// NOT-GFX950: error: instruction not supported on this GPU
1304+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04]
1305+
v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1]
1306+
1307+
// NOT-GFX950: error: instruction not supported on this GPU
1308+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04]
1309+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1]
1310+
1311+
// NOT-GFX950: error: instruction not supported on this GPU
1312+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04]
1313+
v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1]
1314+
1315+
// NOT-GFX950: error: instruction not supported on this GPU
1316+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04]
1317+
v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1]
1318+
1319+
// NOT-GFX950: error: instruction not supported on this GPU
1320+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04]
1321+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0]
1322+
1323+
// NOT-GFX950: error: instruction not supported on this GPU
1324+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04]
1325+
v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0]
1326+
1327+
// NOT-GFX950: error: instruction not supported on this GPU
1328+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04]
1329+
v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0]
1330+
1331+
// NOT-GFX950: error: instruction not supported on this GPU
1332+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24]
1333+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5
1334+
1335+
// NOT-GFX950: error: instruction not supported on this GPU
1336+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84]
1337+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5
1338+
1339+
// NOT-GFX950: error: instruction not supported on this GPU
1340+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04]
1341+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5|
1342+
1343+
// NOT-GFX950: error: instruction not supported on this GPU
1344+
// GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04]
1345+
v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5
1346+
1347+
// NOT-GFX950: error: instruction not supported on this GPU
1348+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24]
1349+
v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5
1350+
1351+
// NOT-GFX950: error: instruction not supported on this GPU
1352+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84]
1353+
v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5
1354+
1355+
// NOT-GFX950: error: instruction not supported on this GPU
1356+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04]
1357+
v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5
1358+
1359+
// NOT-GFX950: error: instruction not supported on this GPU
1360+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04]
1361+
v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5|
1362+
1363+
// NOT-GFX950: error: instruction not supported on this GPU
1364+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24]
1365+
v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5
1366+
1367+
// NOT-GFX950: error: instruction not supported on this GPU
1368+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84]
1369+
v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5
1370+
1371+
// NOT-GFX950: error: instruction not supported on this GPU
1372+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5 ; encoding: [0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04]
1373+
v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5
1374+
1375+
// NOT-GFX950: error: instruction not supported on this GPU
1376+
// GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04]
1377+
v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5|

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -942,3 +942,75 @@
942942

943943
# GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
944944
0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18
945+
946+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04]
947+
0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x04
948+
949+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04]
950+
0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x04
951+
952+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04]
953+
0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x04
954+
955+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04]
956+
0x00,0x60,0x4f,0xd2,0x02,0x09,0x16,0x04
957+
958+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04]
959+
0x00,0x60,0x4e,0xd2,0x02,0x09,0x16,0x04
960+
961+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,1] ; encoding: [0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04]
962+
0x00,0x60,0x3e,0xd2,0x02,0x09,0x16,0x04
963+
964+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04]
965+
0x00,0x40,0x4f,0xd2,0x02,0x09,0x16,0x04
966+
967+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04]
968+
0x00,0x40,0x4e,0xd2,0x02,0x09,0x16,0x04
969+
970+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,0,1] ; encoding: [0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04]
971+
0x00,0x40,0x3e,0xd2,0x02,0x09,0x16,0x04
972+
973+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04]
974+
0x00,0x20,0x4f,0xd2,0x02,0x09,0x16,0x04
975+
976+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04]
977+
0x00,0x20,0x4e,0xd2,0x02,0x09,0x16,0x04
978+
979+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, v5 op_sel:[0,0,1,0] ; encoding: [0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04]
980+
0x00,0x20,0x3e,0xd2,0x02,0x09,0x16,0x04
981+
982+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24]
983+
0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x24
984+
985+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84]
986+
0x00,0x00,0x4f,0xd2,0x02,0x09,0x16,0x84
987+
988+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04]
989+
0x00,0x04,0x4f,0xd2,0x02,0x09,0x16,0x04
990+
991+
# GFX950: v_cvt_scalef32_sr_pk_fp4_bf16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04]
992+
0x00,0x01,0x4f,0xd2,0x02,0x09,0x16,0x04
993+
994+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, -v2, v4, v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24]
995+
0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x24
996+
997+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, -v5 ; encoding: [0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84]
998+
0x00,0x00,0x4e,0xd2,0x02,0x09,0x16,0x84
999+
1000+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, |v2|, v4, v5 ; encoding: [0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04]
1001+
0x00,0x01,0x4e,0xd2,0x02,0x09,0x16,0x04
1002+
1003+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f16 v0, v2, v4, |v5| ; encoding: [0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04]
1004+
0x00,0x04,0x4e,0xd2,0x02,0x09,0x16,0x04
1005+
1006+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, -v[2:3], v4, v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24]
1007+
0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x24
1008+
1009+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, -v5 ; encoding: [0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84]
1010+
0x00,0x00,0x3e,0xd2,0x02,0x09,0x16,0x84
1011+
1012+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, |v[2:3]|, v4, v5 ; encoding: [0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04]
1013+
0x00,0x01,0x3e,0xd2,0x02,0x09,0x16,0x04
1014+
1015+
# GFX950: v_cvt_scalef32_sr_pk_fp4_f32 v0, v[2:3], v4, |v5| ; encoding: [0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04]
1016+
0x00,0x04,0x3e,0xd2,0x02,0x09,0x16,0x04

0 commit comments

Comments
 (0)