@@ -16093,6 +16093,34 @@ static bool isBFloat2(Type *Ty) {
16093
16093
return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
16094
16094
}
16095
16095
16096
+ /// \returns true if it's valid to emit a native instruction for \p RMW, based
16097
+ /// on the properties of the target memory.
16098
+ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
16099
+ const AtomicRMWInst *RMW,
16100
+ bool HasSystemScope) {
16101
+ // The remote/fine-grained access logic is different from the integer
16102
+ // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
16103
+ // fine-grained access does not work, even for a device local allocation.
16104
+ //
16105
+ // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
16106
+ // allocations work.
16107
+ if (HasSystemScope) {
16108
+ if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
16109
+ RMW->hasMetadata("amdgpu.no.remote.memory"))
16110
+ return true;
16111
+ } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16112
+ return true;
16113
+
16114
+ if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16115
+ return true;
16116
+
16117
+ // TODO: Auto-upgrade this attribute to the metadata in function body and stop
16118
+ // checking it.
16119
+ return RMW->getFunction()
16120
+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
16121
+ .getValueAsBool();
16122
+ }
16123
+
16096
16124
TargetLowering::AtomicExpansionKind
16097
16125
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
16098
16126
unsigned AS = RMW->getPointerAddressSpace();
@@ -16236,37 +16264,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
16236
16264
Type *Ty = RMW->getType();
16237
16265
16238
16266
// LDS float and double fmin/fmax were always supported.
16239
- if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16240
- return AtomicExpansionKind::None;
16241
-
16242
- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16243
- return AtomicExpansionKind::CmpXChg;
16244
-
16245
- // Always expand system scope fp atomics.
16246
- if (HasSystemScope)
16247
- return AtomicExpansionKind::CmpXChg;
16267
+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
16268
+ return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
16269
+ : AtomicExpansionKind::CmpXChg;
16270
+ }
16248
16271
16249
- // For flat and global cases:
16250
- // float, double in gfx7. Manual claims denormal support.
16251
- // Removed in gfx8 .
16252
- // float, double restored in gfx10 .
16253
- // double removed again in gfx11, so only f32 for gfx11/gfx12 .
16254
- //
16255
- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no
16256
- // f32.
16257
- //
16258
- // FIXME: Check scope and fine grained memory
16259
- if (AS == AMDGPUAS::FLAT_ADDRESS) {
16260
- if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16261
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16262
- if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16263
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16264
- } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16265
- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16266
- if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16267
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16268
- if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16269
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16272
+ if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
16273
+ // For flat and global cases:
16274
+ // float, double in gfx7. Manual claims denormal support .
16275
+ // Removed in gfx8 .
16276
+ // float, double restored in gfx10 .
16277
+ // double removed again in gfx11, so only f32 for gfx11/gfx12.
16278
+ //
16279
+ // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
16280
+ // no f32.
16281
+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
16282
+ if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16283
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16284
+ if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16285
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16286
+ } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16287
+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16288
+ if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16289
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16290
+ if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16291
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16292
+ }
16270
16293
}
16271
16294
16272
16295
return AtomicExpansionKind::CmpXChg;
0 commit comments