-
Notifications
You must be signed in to change notification settings - Fork 13.6k
AMDGPU: Add subtarget features for minimum3/maximum3 instructions #116308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Add subtarget features for minimum3/maximum3 instructions #116308
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changesgfx12 and gfx950 managed to produce 3 different permutations of this feature. Full diff: https://github.com/llvm/llvm-project/pull/116308.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index d028c1f5ca7613..35dbf86b7c6f36 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -137,6 +137,18 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts",
"Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions"
>;
+def FeatureMinimum3Maximum3F32 : SubtargetFeature<"minimum3-maximum3-f32",
+ "HasMinimum3Maximum3F32",
+ "true",
+ "Has v_minimum3_f32 and v_maximum3_f32 instructions"
+>;
+
+def FeatureMinimum3Maximum3F16 : SubtargetFeature<"minimum3-maximum3-f16",
+ "HasMinimum3Maximum3F16",
+ "true",
+ "Has v_minimum3_f16 and v_maximum3_f16 instructions"
+>;
+
def FeatureSupportsXNACK : SubtargetFeature<"xnack-support",
"SupportsXNACK",
"true",
@@ -1263,6 +1275,7 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureUnalignedDSAccess, FeatureTrue16BitInsts,
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
+ FeatureMinimum3Maximum3F32, FeatureMinimum3Maximum3F16,
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
]
>;
@@ -2005,6 +2018,15 @@ def isGFX12Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX12">,
AssemblerPredicate<(all_of FeatureGFX12Insts)>;
+def HasMinimum3Maximum3F32 :
+ Predicate<"Subtarget->hasMinimum3Maximum3F32()">,
+ AssemblerPredicate<(all_of FeatureMinimum3Maximum3F32)>;
+
+def HasMinimum3Maximum3F16 :
+ Predicate<"Subtarget->hasMinimum3Maximum3F16()">,
+ AssemblerPredicate<(all_of FeatureMinimum3Maximum3F16)>;
+
+
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<(all_of FeatureFlatAddressSpace)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 1b06756a8a1016..2e7a06a15bd52a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -242,7 +242,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasForceStoreSC0SC1 = false;
bool HasRequiredExportPriority = false;
bool HasVmemWriteVgprInOrder = false;
-
+ bool HasMinimum3Maximum3F32 = false;
+ bool HasMinimum3Maximum3F16 = false;
bool RequiresCOV6 = false;
// Dummy feature to use for assembler in tablegen.
@@ -1307,6 +1308,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// \returns true if the target has instructions with xf32 format support.
bool hasXF32Insts() const { return HasXF32Insts; }
+ bool hasMinimum3Maximum3F32() const {
+ return HasMinimum3Maximum3F32;
+ }
+
+ bool hasMinimum3Maximum3F16() const {
+ return HasMinimum3Maximum3F16;
+ }
+
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
/// instruction.
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 34ecdb56e8689d..551e8b3a679202 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -226,7 +226,7 @@ let mayRaiseFPException = 0 in {
defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
} // End mayRaiseFPException = 0
-let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+let SubtargetPredicate = HasMinimum3Maximum3F32, ReadsModeReg = 0 in {
defm V_MINIMUM3_F32 : VOP3Inst <"v_minimum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfminimum3>;
defm V_MAXIMUM3_F32 : VOP3Inst <"v_maximum3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
@@ -625,7 +625,7 @@ defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
-let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
+let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
|
You can test this locally with the following command:git-clang-format --diff a6fc489bb7a2e9fb3a7f70cccc181e4ee70374bf cde7770eb305155c42e82432084b308f4248723a --extensions h -- llvm/lib/Target/AMDGPU/GCNSubtarget.h View the diff from clang-format here.diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 2e7a06a15b..d68177c281 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1308,13 +1308,9 @@ public:
/// \returns true if the target has instructions with xf32 format support.
bool hasXF32Insts() const { return HasXF32Insts; }
- bool hasMinimum3Maximum3F32() const {
- return HasMinimum3Maximum3F32;
- }
+ bool hasMinimum3Maximum3F32() const { return HasMinimum3Maximum3F32; }
- bool hasMinimum3Maximum3F16() const {
- return HasMinimum3Maximum3F16;
- }
+ bool hasMinimum3Maximum3F16() const { return HasMinimum3Maximum3F16; }
/// \returns The maximum number of instructions that can be enclosed in an
/// S_CLAUSE on the given subtarget, or 0 for targets that do not support that
|
b99a4f4
to
1eebc85
Compare
d6fb34c
to
8bee1d6
Compare
1eebc85
to
5097aa7
Compare
c6a6353
to
fd4cc28
Compare
gfx12 and gfx950 managed to produce 3 different permutations of this feature. gfx12 supports f32 and f16, and gfx950 supports f32 and v2f16.
5097aa7
to
cde7770
Compare
…vm#116308) gfx12 and gfx950 managed to produce 3 different permutations of this feature. gfx12 supports f32 and f16, and gfx950 supports f32 and v2f16. Change-Id: I18fa032af449c832fa9a6b099a5ef5039c8e57fb
gfx12 and gfx950 managed to produce 3 different permutations of this feature.
gfx12 supports f32 and f16, and gfx950 supports f32 and v2f16.