@@ -16118,6 +16118,34 @@ static bool isBFloat2(Type *Ty) {
16118
16118
return VT && VT->getNumElements() == 2 && VT->getElementType()->isBFloatTy();
16119
16119
}
16120
16120
16121
+ /// \returns true if it's valid to emit a native instruction for \p RMW, based
16122
+ /// on the properties of the target memory.
16123
+ static bool globalMemoryFPAtomicIsLegal(const GCNSubtarget &Subtarget,
16124
+ const AtomicRMWInst *RMW,
16125
+ bool HasSystemScope) {
16126
+ // The remote/fine-grained access logic is different from the integer
16127
+ // atomics. Without AgentScopeFineGrainedRemoteMemoryAtomics support,
16128
+ // fine-grained access does not work, even for a device local allocation.
16129
+ //
16130
+ // With AgentScopeFineGrainedRemoteMemoryAtomics, system scoped device local
16131
+ // allocations work.
16132
+ if (HasSystemScope) {
16133
+ if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics() &&
16134
+ RMW->hasMetadata("amdgpu.no.remote.memory"))
16135
+ return true;
16136
+ } else if (Subtarget.supportsAgentScopeFineGrainedRemoteMemoryAtomics())
16137
+ return true;
16138
+
16139
+ if (RMW->hasMetadata("amdgpu.no.fine.grained.memory"))
16140
+ return true;
16141
+
16142
+ // TODO: Auto-upgrade this attribute to the metadata in function body and stop
16143
+ // checking it.
16144
+ return RMW->getFunction()
16145
+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
16146
+ .getValueAsBool();
16147
+ }
16148
+
16121
16149
TargetLowering::AtomicExpansionKind
16122
16150
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
16123
16151
unsigned AS = RMW->getPointerAddressSpace();
@@ -16268,37 +16296,32 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
16268
16296
Type *Ty = RMW->getType();
16269
16297
16270
16298
// LDS float and double fmin/fmax were always supported.
16271
- if (AS == AMDGPUAS::LOCAL_ADDRESS && (Ty->isFloatTy() || Ty->isDoubleTy()))
16272
- return AtomicExpansionKind::None;
16273
-
16274
- if (unsafeFPAtomicsDisabled(RMW->getFunction()))
16275
- return AtomicExpansionKind::CmpXChg;
16276
-
16277
- // Always expand system scope fp atomics.
16278
- if (HasSystemScope)
16279
- return AtomicExpansionKind::CmpXChg;
16299
+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
16300
+ return Ty->isFloatTy() || Ty->isDoubleTy() ? AtomicExpansionKind::None
16301
+ : AtomicExpansionKind::CmpXChg;
16302
+ }
16280
16303
16281
- // For flat and global cases:
16282
- // float, double in gfx7. Manual claims denormal support.
16283
- // Removed in gfx8 .
16284
- // float, double restored in gfx10 .
16285
- // double removed again in gfx11, so only f32 for gfx11/gfx12 .
16286
- //
16287
- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but no
16288
- // f32.
16289
- //
16290
- // FIXME: Check scope and fine grained memory
16291
- if (AS == AMDGPUAS::FLAT_ADDRESS) {
16292
- if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16293
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16294
- if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16295
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16296
- } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16297
- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16298
- if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16299
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16300
- if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16301
- return ReportUnsafeHWInst(AtomicExpansionKind::None);
16304
+ if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
16305
+ // For flat and global cases:
16306
+ // float, double in gfx7. Manual claims denormal support .
16307
+ // Removed in gfx8 .
16308
+ // float, double restored in gfx10 .
16309
+ // double removed again in gfx11, so only f32 for gfx11/gfx12.
16310
+ //
16311
+ // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
16312
+ // no f32.
16313
+ if (AS == AMDGPUAS::FLAT_ADDRESS) {
16314
+ if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
16315
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16316
+ if (Subtarget->hasAtomicFMinFMaxF64FlatInsts() && Ty->isDoubleTy())
16317
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16318
+ } else if (AMDGPU::isExtendedGlobalAddrSpace(AS) ||
16319
+ AS == AMDGPUAS::BUFFER_FAT_POINTER) {
16320
+ if (Subtarget->hasAtomicFMinFMaxF32GlobalInsts() && Ty->isFloatTy())
16321
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16322
+ if (Subtarget->hasAtomicFMinFMaxF64GlobalInsts() && Ty->isDoubleTy())
16323
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
16324
+ }
16302
16325
}
16303
16326
16304
16327
return AtomicExpansionKind::CmpXChg;
0 commit comments