Skip to content

Commit 1dffe4c

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Add minimum3/maximum3 pkf16 for gfx950 encodings (llvm#117601)
1 parent 727f9a0 commit 1dffe4c

File tree

6 files changed

+189
-1
lines changed

6 files changed

+189
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,12 @@ def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
149149
"Has v_minimum3_f16 and v_maximum3_f16 instructions"
150150
>;
151151

152+
def FeatureMinimum3Maximum3PKF16 : SubtargetFeature<"minimum3-maximum3-pkf16",
153+
"HasMinimum3Maximum3PKF16",
154+
"true",
155+
"Has v_pk_minimum3_f16 and v_pk_maximum3_f16 instructions"
156+
>;
157+
152158
def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
153159
"SupportsXNACK",
154160
"true",
@@ -432,7 +438,8 @@ def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
432438
FeatureFP4ConversionScaleInsts,
433439
FeatureFP6BF6ConversionScaleInsts,
434440
FeatureF16BF16ToFP6BF6ConversionScaleInsts,
435-
FeatureMinimum3Maximum3F32
441+
FeatureMinimum3Maximum3F32,
442+
FeatureMinimum3Maximum3PKF16
436443
]
437444
>;
438445

@@ -2147,6 +2154,10 @@ def HasMinimum3Maximum3F16 :
21472154
Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
21482155
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;
21492156

2157+
def HasMinimum3Maximum3PKF16 :
2158+
Predicate<"Subtarget->hasMinimum3Maximum3PKF16()">,
2159+
AssemblerPredicate<(all_of FeatureMinimum3Maximum3PKF16)>;
2160+
21502161

21512162
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
21522163
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
251251
bool HasAshrPkInsts = false;
252252
bool HasMinimum3Maximum3F32 = false;
253253
bool HasMinimum3Maximum3F16 = false;
254+
bool HasMinimum3Maximum3PKF16 = false;
254255

255256
bool RequiresCOV6 = false;
256257

@@ -1345,6 +1346,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13451346
return HasMinimum3Maximum3F16;
13461347
}
13471348

1349+
bool hasMinimum3Maximum3PKF16() const {
1350+
return HasMinimum3Maximum3PKF16;
1351+
}
1352+
13481353
/// \returns The maximum number of instructions that can be enclosed in an
13491354
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
13501355
/// instruction.

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ def : VOP3PSatPat<usubsat, V_PK_SUB_U16>;
144144
def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
145145
} // End SubtargetPredicate = HasVOP3PInsts
146146

147+
let SubtargetPredicate = HasMinimum3Maximum3PKF16, FPDPRounding = 1 in {
148+
defm V_PK_MINIMUM3_F16 : VOP3PInst<"v_pk_minimum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
149+
defm V_PK_MAXIMUM3_F16 : VOP3PInst<"v_pk_maximum3_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>;
150+
}
151+
147152
// TODO: Make sure we're doing the right thing with denormals. Note
148153
// that FMA and MAD will differ.
149154
multiclass MadFmaMixPats<SDPatternOperator fma_like,
@@ -2053,6 +2058,9 @@ defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
20532058
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
20542059
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
20552060

2061+
defm V_PK_MINIMUM3_F16 : VOP3P_Real_vi <0x1b>;
2062+
defm V_PK_MAXIMUM3_F16 : VOP3P_Real_vi <0x1c>;
2063+
20562064
let OtherPredicates = [HasMadMixInsts] in {
20572065
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
20582066
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;

llvm/test/MC/AMDGPU/gfx950_asm_features.s

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,3 +1182,100 @@ v_maximum3_f32 v1, v2, s8, v3
11821182
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
11831183
// GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
11841184
v_minimum3_f32 v0, v1, v2, v3
1185+
1186+
1187+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1188+
// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
1189+
v_pk_minimum3_f16 v1, v2, v3, v4
1190+
1191+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1192+
// GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
1193+
v_pk_minimum3_f16 v1, v2, v3, 2.0
1194+
1195+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1196+
// GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
1197+
v_pk_minimum3_f16 v1, v2, 2.0, v3
1198+
1199+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1200+
// GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
1201+
v_pk_minimum3_f16 v1, 2.0, v2, v3
1202+
1203+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1204+
// GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
1205+
v_pk_minimum3_f16 v1, v2, v3, v4 clamp
1206+
1207+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1208+
// GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
1209+
v_pk_minimum3_f16 v8, v0, s8, v1
1210+
1211+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1212+
// GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
1213+
v_pk_minimum3_f16 v8, v0, v1, s8
1214+
1215+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1216+
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
1217+
v_pk_minimum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
1218+
1219+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1220+
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
1221+
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
1222+
1223+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1224+
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
1225+
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
1226+
1227+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1228+
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
1229+
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
1230+
1231+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1232+
// GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
1233+
v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
1234+
1235+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1236+
// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
1237+
v_pk_maximum3_f16 v1, v2, v3, v4
1238+
1239+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1240+
// GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
1241+
v_pk_maximum3_f16 v1, v2, v3, 2.0
1242+
1243+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1244+
// GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
1245+
v_pk_maximum3_f16 v1, v2, 2.0, v3
1246+
1247+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1248+
// GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
1249+
v_pk_maximum3_f16 v1, 2.0, v2, v3
1250+
1251+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1252+
// GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
1253+
v_pk_maximum3_f16 v1, v2, v3, v4 clamp
1254+
1255+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1256+
// GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
1257+
v_pk_maximum3_f16 v8, v0, s8, v1
1258+
1259+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1260+
// GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
1261+
v_pk_maximum3_f16 v8, v0, v1, s8
1262+
1263+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1264+
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
1265+
v_pk_maximum3_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
1266+
1267+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1268+
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
1269+
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
1270+
1271+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1272+
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
1273+
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
1274+
1275+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1276+
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
1277+
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
1278+
1279+
// NOT-GFX950: :[[@LINE+2]]:{{[0-9]+}}: error:
1280+
// GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
1281+
v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]

llvm/test/MC/AMDGPU/gfx950_err.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,9 @@ v_minimum3_f32 v0, s1, s2, v3
386386

387387
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: literal operands are not supported
388388
v_minimum3_f32 v0, v1, v2, 0xdeadbeef
389+
390+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
391+
v_pk_minimum3_f16 v0, s1, s2, v3
392+
393+
// GFX950: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand (violates constant bus restrictions)
394+
v_pk_maximum3_f16 v0, s1, s2, v3

llvm/test/MC/Disassembler/AMDGPU/gfx950_dasm_vop3.txt

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -881,3 +881,64 @@
881881

882882
# GFX950: v_minimum3_f32 v0, v1, v2, v3 ; encoding: [0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04]
883883
0x00,0x00,0xa8,0xd2,0x01,0x05,0x0e,0x04
884+
885+
886+
# GFX950: v_pk_maximum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c]
887+
0x01,0x40,0x9c,0xd3,0xf4,0x04,0x0e,0x1c
888+
889+
# GFX950: v_pk_maximum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c]
890+
0x01,0x40,0x9c,0xd3,0x02,0xe9,0x0d,0x1c
891+
892+
# GFX950: v_pk_maximum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b]
893+
0x01,0x40,0x9c,0xd3,0x02,0x07,0xd2,0x1b
894+
895+
# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c]
896+
0x01,0x40,0x9c,0xd3,0x02,0x07,0x12,0x1c
897+
898+
# GFX950: v_pk_maximum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c]
899+
0x01,0xc0,0x9c,0xd3,0x02,0x07,0x12,0x1c
900+
901+
# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c]
902+
0x08,0x40,0x9c,0xd3,0x00,0x01,0x04,0x1c
903+
904+
# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04]
905+
0x08,0x60,0x9c,0xd3,0x00,0x01,0x04,0x04
906+
907+
# GFX950: v_pk_maximum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04]
908+
0x08,0x00,0x9c,0xd3,0x00,0x01,0x04,0x04
909+
910+
# GFX950: v_pk_maximum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c]
911+
0x08,0x40,0x9c,0xd3,0x00,0x11,0x04,0x1c
912+
913+
# GFX950: v_pk_maximum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18]
914+
0x08,0x40,0x9c,0xd3,0x00,0x03,0x22,0x18
915+
916+
# GFX950: v_pk_minimum3_f16 v1, 2.0, v2, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c]
917+
0x01,0x40,0x9b,0xd3,0xf4,0x04,0x0e,0x1c
918+
919+
# GFX950: v_pk_minimum3_f16 v1, v2, 2.0, v3 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c]
920+
0x01,0x40,0x9b,0xd3,0x02,0xe9,0x0d,0x1c
921+
922+
# GFX950: v_pk_minimum3_f16 v1, v2, v3, 2.0 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b]
923+
0x01,0x40,0x9b,0xd3,0x02,0x07,0xd2,0x1b
924+
925+
# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 ; encoding: [0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c]
926+
0x01,0x40,0x9b,0xd3,0x02,0x07,0x12,0x1c
927+
928+
# GFX950: v_pk_minimum3_f16 v1, v2, v3, v4 clamp ; encoding: [0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c]
929+
0x01,0xc0,0x9b,0xd3,0x02,0x07,0x12,0x1c
930+
931+
# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c]
932+
0x08,0x40,0x9b,0xd3,0x00,0x01,0x04,0x1c
933+
934+
# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04]
935+
0x08,0x60,0x9b,0xd3,0x00,0x01,0x04,0x04
936+
937+
# GFX950: v_pk_minimum3_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04]
938+
0x08,0x00,0x9b,0xd3,0x00,0x01,0x04,0x04
939+
940+
# GFX950: v_pk_minimum3_f16 v8, v0, s8, v1 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c]
941+
0x08,0x40,0x9b,0xd3,0x00,0x11,0x04,0x1c
942+
943+
# GFX950: v_pk_minimum3_f16 v8, v0, v1, s8 ; encoding: [0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18]
944+
0x08,0x40,0x9b,0xd3,0x00,0x03,0x22,0x18

0 commit comments

Comments
 (0)