Skip to content

Commit 302a99a

Browse files
committed
AMDGPU: Add a subtarget feature for fine-grained remote memory support
Atomic access to fine-grained remote memory does not work on all subtargets. Add a feature for targets where this is expected to work.
1 parent 4f80f36 commit 302a99a

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,16 @@ def FeatureFlatAtomicFaddF32Inst
788788
"Has flat_atomic_add_f32 instruction"
789789
>;
790790

791+
def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
792+
: SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
793+
"HasAgentScopeFineGrainedRemoteMemoryAtomics",
794+
"true",
795+
"Agent (device) scoped atomic operations, excluding those directly "
796+
"supported by PCIe (i.e. integer atomic add, exchange, and "
797+
"compare-and-swap), are functional for allocations in host or peer "
798+
"device memory."
799+
>;
800+
791801
def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero",
792802
"HasDefaultComponentZero",
793803
"true",
@@ -1207,7 +1217,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
12071217
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
12081218
FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
12091219
FeatureMaxHardClauseLength32,
1210-
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
1220+
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
1221+
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
12111222
]
12121223
>;
12131224

@@ -1415,7 +1426,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14151426
FeatureBackOffBarrier,
14161427
FeatureKernargPreload,
14171428
FeatureAtomicFMinFMaxF64GlobalInsts,
1418-
FeatureAtomicFMinFMaxF64FlatInsts
1429+
FeatureAtomicFMinFMaxF64FlatInsts,
1430+
FeatureAgentScopeFineGrainedRemoteMemoryAtomics
14191431
]>;
14201432

14211433
def FeatureISAVersion9_4_0 : FeatureSet<

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
174174
bool HasAtomicBufferPkAddBF16Inst = false;
175175
bool HasFlatAtomicFaddF32Inst = false;
176176
bool HasDefaultComponentZero = false;
177+
bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
177178
bool HasDefaultComponentBroadcast = false;
178179
/// The maximum number of instructions that may be placed within an S_CLAUSE,
179180
/// which is one greater than the maximum argument to S_CLAUSE. A value of 0
@@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
871872

872873
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
873874

875+
/// \return true if atomic operations targeting fine-grained memory work
876+
/// correctly at device scope, in allocations in host or peer PCIe device
877+
/// memory.
878+
bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const {
879+
return HasAgentScopeFineGrainedRemoteMemoryAtomics;
880+
}
881+
874882
bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }
875883

876884
bool hasDefaultComponentBroadcast() const {

0 commit comments

Comments
 (0)