Skip to content

Commit 1eebc85

Browse files
committed
AMDGPU: Add subtarget features for minimum3/maximum3 instructions
gfx12 and gfx950 managed to produce 3 different permutations of this feature. gfx12 supports f32 and f16, and gfx950 supports f32 and v2f16.
1 parent d6fb34c commit 1eebc85

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
137137
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
138138
>;
139139

140+
def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
141+
"HasMinimum3Maximum3F32",
142+
"true",
143+
"Has v_minimum3_f32 and v_maximum3_f32 instructions"
144+
>;
145+
146+
def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
147+
"HasMinimum3Maximum3F16",
148+
"true",
149+
"Has v_minimum3_f16 and v_maximum3_f16 instructions"
150+
>;
151+
140152
def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
141153
"SupportsXNACK",
142154
"true",
@@ -1263,6 +1275,7 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
12631275
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
12641276
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
12651277
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
1278+
FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
12661279
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
12671280
]
12681281
>;
@@ -2005,6 +2018,15 @@ def isGFX12Plus :
20052018
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
20062019
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
20072020

2021+
def HasMinimum3Maximum3F32 :
2022+
Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
2023+
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;
2024+
2025+
def HasMinimum3Maximum3F16 :
2026+
Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
2027+
AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;
2028+
2029+
20082030
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
20092031
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
20102032

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
242242
bool HasForceStoreSC0SC1 = false;
243243
bool HasRequiredExportPriority = false;
244244
bool HasVmemWriteVgprInOrder = false;
245-
245+
bool HasMinimum3Maximum3F32 = false;
246+
bool HasMinimum3Maximum3F16 = false;
246247
bool RequiresCOV6 = false;
247248

248249
// Dummy feature to use for assembler in tablegen.
@@ -1307,6 +1308,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
13071308
/// \returns true if the target has instructions with xf32 format support.
13081309
bool hasXF32Insts() const { return HasXF32Insts; }
13091310

1311+
bool hasMinimum3Maximum3F32() const {
1312+
return HasMinimum3Maximum3F32;
1313+
}
1314+
1315+
bool hasMinimum3Maximum3F16() const {
1316+
return HasMinimum3Maximum3F16;
1317+
}
1318+
13101319
/// \returns The maximum number of instructions that can be enclosed in an
13111320
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
13121321
/// instruction.

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ let mayRaiseFPException = 0 in {
226226
defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
227227
} // End mayRaiseFPException = 0
228228

229-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
229+
let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
230230
defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>;
231231
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
232232
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
@@ -625,7 +625,7 @@ defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3
625625
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
626626
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
627627

628-
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
628+
let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
629629
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
630630
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
631631
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0

0 commit comments

Comments
 (0)