Skip to content

Commit 98b5751

Browse files
arsenmpravinjagtap
andcommitted
AMDGPU: MC support for v_cvt_scalef32_pk32_f32_[fp|bf]6 of gfx950 (llvm#117590)
Co-authored-by: Pravin Jagtap <[email protected]>
1 parent 5bfbf99 commit 98b5751

File tree

10 files changed

+87
-5
lines changed

10 files changed

+87
-5
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,11 +402,17 @@ def FeatureFP4ConversionScaleInsts : SubtargetFeature<"fp4-cvt-scale-insts",
402402
"Has fp4 conversion scale instructions"
403403
>;
404404

405+
def FeatureFP6BF6ConversionScaleInsts : SubtargetFeature<"fp6bf6-cvt-scale-insts",
406+
"HasFP6BF6ConversionScaleInsts",
407+
"true",
408+
"Has fp6 and bf6 conversion scale instructions"
409+
>;
410+
405411
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
406412
"GFX950Insts",
407413
"true",
408414
"Additional instructions for GFX950+",
409-
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts]
415+
[FeaturePermlane16Swap, FeaturePermlane32Swap, FeatureFP8ConversionScaleInsts, FeatureBF8ConversionScaleInsts, FeatureFP4ConversionScaleInsts, FeatureFP6BF6ConversionScaleInsts]
410416
>;
411417

412418
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
@@ -1554,7 +1560,8 @@ def FeatureISAVersion9_5_Common : FeatureSet<
15541560
FeatureBitOp3Insts,
15551561
FeatureFP8ConversionScaleInsts,
15561562
FeatureBF8ConversionScaleInsts,
1557-
FeatureFP4ConversionScaleInsts
1563+
FeatureFP4ConversionScaleInsts,
1564+
FeatureFP6BF6ConversionScaleInsts
15581565
])>;
15591566

15601567
def FeatureISAVersion9_4_0 : FeatureSet<
@@ -2440,6 +2447,9 @@ def HasBF8ConversionScaleInsts : Predicate<"Subtarget->hasBF8ConversionScaleInst
24402447
def HasFP4ConversionScaleInsts : Predicate<"Subtarget->hasFP4ConversionScaleInsts()">,
24412448
AssemblerPredicate<(all_of FeatureFP4ConversionScaleInsts)>;
24422449

2450+
def HasFP6BF6ConversionScaleInsts : Predicate<"Subtarget->hasFP6BF6ConversionScaleInsts()">,
2451+
AssemblerPredicate<(all_of FeatureFP6BF6ConversionScaleInsts)>;
2452+
24432453
def HasGDS : Predicate<"Subtarget->hasGDS()">;
24442454

24452455
def HasGWS : Predicate<"Subtarget->hasGWS()">;

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class AMDGPUSubtarget {
5353
bool HasFP8ConversionScaleInsts = false;
5454
bool HasBF8ConversionScaleInsts = false;
5555
bool HasFP4ConversionScaleInsts = false;
56+
bool HasFP6BF6ConversionScaleInsts = false;
5657
bool EnableRealTrue16Insts = false;
5758
bool HasBF16ConversionInsts = false;
5859
bool HasMadMixInsts = false;
@@ -184,6 +185,8 @@ class AMDGPUSubtarget {
184185

185186
bool hasFP4ConversionScaleInsts() const { return HasFP4ConversionScaleInsts; }
186187

188+
bool hasFP6BF6ConversionScaleInsts() const { return HasFP6BF6ConversionScaleInsts; }
189+
187190
bool hasMadMacF32Insts() const {
188191
return HasMadMacF32Insts || !isGCN();
189192
}

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,6 +1500,7 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
15001500
case OPWV232: return VReg_64RegClassID;
15011501
case OPW96: return VReg_96RegClassID;
15021502
case OPW128: return VReg_128RegClassID;
1503+
case OPW192: return VReg_192RegClassID;
15031504
case OPW160: return VReg_160RegClassID;
15041505
case OPW256: return VReg_256RegClassID;
15051506
case OPW288: return VReg_288RegClassID;

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ class AMDGPUDisassembler : public MCDisassembler {
219219
OPW96,
220220
OPW128,
221221
OPW160,
222+
OPW192,
222223
OPW256,
223224
OPW288,
224225
OPW320,

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1556,7 +1556,8 @@ class getVALUDstForVT<ValueType VT, bit IsTrue16 = 0, bit IsVOP3Encoding = 0> {
15561556
defvar op16 = !if(IsTrue16, !if (IsVOP3Encoding, VOPDstOperand_t16,
15571557
VOPDstOperand_t16Lo128),
15581558
VOPDstOperand<VGPR_32>);
1559-
RegisterOperand ret = !cond(!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
1559+
RegisterOperand ret = !cond(!eq(VT.Size, 1024) : VOPDstOperand<VReg_1024>,
1560+
!eq(VT.Size, 256) : VOPDstOperand<VReg_256>,
15601561
!eq(VT.Size, 128) : VOPDstOperand<VReg_128>,
15611562
!eq(VT.Size, 64) : VOPDstOperand<VReg_64>,
15621563
!eq(VT.Size, 32) : VOPDstOperand<VGPR_32>,
@@ -1612,7 +1613,8 @@ class getSOPSrcForVT<ValueType VT> {
16121613
// Returns the vreg register class to use for source operand given VT
16131614
class getVregSrcForVT<ValueType VT, bit IsTrue16 = 0, bit IsFake16 = 0> {
16141615
RegisterOperand ret =
1615-
!cond(!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
1616+
!cond(!eq(VT.Size, 192) : RegisterOperand<VReg_192>,
1617+
!eq(VT.Size, 128) : RegisterOperand<VReg_128>,
16161618
!eq(VT.Size, 96) : RegisterOperand<VReg_96>,
16171619
!eq(VT.Size, 64) : RegisterOperand<VReg_64>,
16181620
!eq(VT.Size, 48) : RegisterOperand<VReg_64>,
@@ -1645,6 +1647,7 @@ class getVOP3SrcForVT<ValueType VT, bit IsTrue16 = 0> {
16451647
!eq(VT, v2i16) : VSrc_v2b16,
16461648
!eq(VT, v4f16) : AVSrc_64,
16471649
!eq(VT, v4bf16) : AVSrc_64,
1650+
!eq(VT.Size, 192) : VRegSrc_192,
16481651
!eq(VT.Size, 128) : VRegSrc_128,
16491652
!eq(VT.Size, 96) : VRegSrc_96,
16501653
!eq(VT.Size, 64) : VSrc_b64,
@@ -2635,6 +2638,9 @@ def VOP_I32_I32_I32_ARITH : VOPProfile <[i32, i32, i32, untyped], /*EnableClamp=
26352638
def VOP_V2F16_F32_F32 : VOPProfile <[v2f16, f32, f32, untyped]>;
26362639
def VOP_F32_F16_F16_F16 : VOPProfile <[f32, f16, f16, f16]>;
26372640
def VOP_V2BF16_F32_F32 : VOPProfile <[v2bf16, f32, f32, untyped]>;
2641+
def VOP_V32F32_V6I32_F32 : VOPProfile <[v32f32, v6i32, f32, untyped]>;
2642+
def VOP_V32F16_V6I32_F32 : VOPProfile <[v32f16, v6i32, f32, untyped]>;
2643+
def VOP_V32BF16_V6I32_F32 : VOPProfile <[v32bf16, v6i32, f32, untyped]>;
26382644

26392645
def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
26402646
def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,7 @@ def VRegSrc_32 : SrcReg9<VGPR_32, "OPW32">;
12501250
def VRegSrc_64 : SrcReg9<VReg_64, "OPW64">;
12511251
def VRegSrc_96 : SrcReg9<VReg_96, "OPW96">;
12521252
def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
1253+
def VRegSrc_192: SrcReg9<VReg_192, "OPW192">;
12531254
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
12541255
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
12551256

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,19 @@ def VOP3_CVT_SCALE_PK_FP8BF8_F16BF16_Profile : VOP3_Profile<VOPProfile<[i32, v2f
928928
let HasOMod = 0;
929929
}
930930

931+
class VOP3_CVT_SCALEF32_PK_F864_Profile<VOPProfile P> : VOP3_Profile<P> {
932+
let HasModifiers = 0;
933+
let HasSrc0IntMods = 0;
934+
let HasSrc1IntMods = 0;
935+
let HasOMod = 0;
936+
let HasOpSel = 0;
937+
let HasClamp = 0;
938+
let HasExtDPP = 0;
939+
let HasExt32BitDPP = 0;
940+
let HasExtVOP3DPP = 0;
941+
let HasExt64BitDPP = 0;
942+
}
943+
931944
let SubtargetPredicate = HasFP8ConversionScaleInsts, mayRaiseFPException = 0 in {
932945
defm V_CVT_SCALEF32_F16_FP8 : VOP3Inst<"v_cvt_scalef32_f16_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f16>>;
933946
defm V_CVT_SCALEF32_F32_FP8 : VOP3Inst<"v_cvt_scalef32_f32_fp8", VOP3_CVT_SCALE_F1632_FP8BF8_Profile<f32>>;
@@ -953,6 +966,11 @@ let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in
953966
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3Inst<"v_cvt_scalef32_pk_bf16_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2bf16>>;
954967
}
955968

969+
let SubtargetPredicate = HasFP6BF6ConversionScaleInsts, mayRaiseFPException = 0 in {
970+
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_fp6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
971+
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3Inst<"v_cvt_scalef32_pk32_f32_bf6", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V32F32_V6I32_F32>>;
972+
}
973+
956974
let SubtargetPredicate = isGFX10Plus in {
957975
let isCommutable = 1, isReMaterializable = 1 in {
958976
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1874,3 +1892,7 @@ defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3OpSel_Real_gfx9 <0x23d>;
18741892
defm V_CVT_SCALEF32_PK_F16_FP4 : VOP3OpSel_Real_gfx9 <0x250>;
18751893
defm V_CVT_SCALEF32_PK_BF16_FP4 : VOP3OpSel_Real_gfx9 <0x251>;
18761894
}
1895+
let OtherPredicates = [HasFP6BF6ConversionScaleInsts] in {
1896+
defm V_CVT_SCALEF32_PK32_F32_FP6 : VOP3_Real_gfx9<0x256, "v_cvt_scalef32_pk32_f32_fp6">;
1897+
defm V_CVT_SCALEF32_PK32_F32_BF6 : VOP3_Real_gfx9<0x257, "v_cvt_scalef32_pk32_f32_bf6">;
1898+
}

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -884,4 +884,12 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, s3 op_sel:[1,1,0]
884884

885885
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
886886
// GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
887-
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]
887+
v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0]
888+
889+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
890+
// GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
891+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6
892+
893+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
894+
// GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
895+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,27 @@ v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 div:2
125125

126126
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
127127
v_cvt_scalef32_pk_bf16_fp4 v1, v2, v3 clamp div:2
128+
129+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
130+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp
131+
132+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
133+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 mul:2
134+
135+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
136+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 div:2
137+
138+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
139+
v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 clamp div:2
140+
141+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction
142+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp
143+
144+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
145+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 mul:2
146+
147+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
148+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 div:2
149+
150+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: not a valid operand
151+
v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 clamp div:2

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,3 +611,9 @@
611611

612612
# GFX950: v_cvt_scalef32_pk_bf16_fp4 v1, s2, 3 op_sel:[1,1,0] ; encoding: [0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00]
613613
0x01,0x18,0x51,0xd2,0x02,0x06,0x01,0x00
614+
615+
# GFX950: v_cvt_scalef32_pk32_f32_fp6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00]
616+
0x02,0x00,0x56,0xd2,0x02,0x0d,0x02,0x00
617+
618+
# GFX950: v_cvt_scalef32_pk32_f32_bf6 v[2:33], v[2:7], v6 ; encoding: [0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00]
619+
0x02,0x00,0x57,0xd2,0x02,0x0d,0x02,0x00

0 commit comments

Comments
 (0)