Skip to content

Commit 7f448a1

Browse files
arsenmAlexisPerry
authored andcommitted
AMDGPU: Remove ds_fmin/ds_fmax intrinsics (llvm#96739)
These have been replaced with atomicrmw.
1 parent 28587dc commit 7f448a1

File tree

12 files changed

+65
-2154
lines changed

12 files changed

+65
-2154
lines changed

llvm/docs/ReleaseNotes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ Changes to the AMDGPU Backend
132132

133133
* Implemented :ref:`llvm.get.rounding <int_get_rounding>` and :ref:`llvm.set.rounding <int_set_rounding>`
134134

135+
* Removed ``llvm.amdgcn.ds.fadd``, ``llvm.amdgcn.ds.fmin`` and
136+
``llvm.amdgcn.ds.fmax`` intrinsics. Users should use the
137+
:ref:`atomicrmw <i_atomicrmw>` instruction with `fadd`, `fmin` and
138+
`fmax` with addrspace(3) instead.
139+
135140
Changes to the ARM Backend
136141
--------------------------
137142

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -523,17 +523,6 @@ def int_amdgcn_fmad_ftz :
523523
[IntrNoMem, IntrSpeculatable]
524524
>;
525525

526-
class AMDGPULDSIntrin :
527-
Intrinsic<[llvm_any_ty],
528-
[LLVMQualPointerType<3>,
529-
LLVMMatchType<0>,
530-
llvm_i32_ty, // ordering
531-
llvm_i32_ty, // scope
532-
llvm_i1_ty], // isVolatile
533-
[IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
534-
ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]
535-
>;
536-
537526
// FIXME: The m0 argument should be moved after the normal arguments
538527
class AMDGPUDSOrderedIntrinsic : Intrinsic<
539528
[llvm_i32_ty],
@@ -571,9 +560,6 @@ def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
571560
def int_amdgcn_ds_append : AMDGPUDSAppendConsumedIntrinsic;
572561
def int_amdgcn_ds_consume : AMDGPUDSAppendConsumedIntrinsic;
573562

574-
def int_amdgcn_ds_fmin : AMDGPULDSIntrin;
575-
def int_amdgcn_ds_fmax : AMDGPULDSIntrin;
576-
577563
} // TargetPrefix = "amdgcn"
578564

579565
// New-style image intrinsics

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,8 +1033,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
10331033
break; // No other 'amdgcn.atomic.*'
10341034
}
10351035

1036-
if (Name.starts_with("ds.fadd")) {
1037-
// Replaced with atomicrmw fadd, so there's no new declaration.
1036+
if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||
1037+
Name.starts_with("ds.fmax")) {
1038+
// Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1039+
// declaration.
10381040
NewFn = nullptr;
10391041
return true;
10401042
}
@@ -2347,6 +2349,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
23472349
AtomicRMWInst::BinOp RMWOp =
23482350
StringSwitch<AtomicRMWInst::BinOp>(Name)
23492351
.StartsWith("ds.fadd", AtomicRMWInst::FAdd)
2352+
.StartsWith("ds.fmin", AtomicRMWInst::FMin)
2353+
.StartsWith("ds.fmax", AtomicRMWInst::FMax)
23502354
.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
23512355
.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);
23522356

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5401,35 +5401,6 @@ bool AMDGPULegalizerInfo::legalizeRsqClampIntrinsic(MachineInstr &MI,
54015401
return true;
54025402
}
54035403

5404-
static unsigned getDSFPAtomicOpcode(Intrinsic::ID IID) {
5405-
switch (IID) {
5406-
case Intrinsic::amdgcn_ds_fmin:
5407-
return AMDGPU::G_ATOMICRMW_FMIN;
5408-
case Intrinsic::amdgcn_ds_fmax:
5409-
return AMDGPU::G_ATOMICRMW_FMAX;
5410-
default:
5411-
llvm_unreachable("not a DS FP intrinsic");
5412-
}
5413-
}
5414-
5415-
bool AMDGPULegalizerInfo::legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
5416-
MachineInstr &MI,
5417-
Intrinsic::ID IID) const {
5418-
GISelChangeObserver &Observer = Helper.Observer;
5419-
Observer.changingInstr(MI);
5420-
5421-
MI.setDesc(ST.getInstrInfo()->get(getDSFPAtomicOpcode(IID)));
5422-
5423-
// The remaining operands were used to set fields in the MemOperand on
5424-
// construction.
5425-
for (int I = 6; I > 3; --I)
5426-
MI.removeOperand(I);
5427-
5428-
MI.removeOperand(1); // Remove the intrinsic ID.
5429-
Observer.changedInstr(MI);
5430-
return true;
5431-
}
5432-
54335404
// TODO: Fix pointer type handling
54345405
bool AMDGPULegalizerInfo::legalizeLaneOp(LegalizerHelper &Helper,
54355406
MachineInstr &MI,
@@ -7451,9 +7422,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
74517422
return legalizeBufferAtomic(MI, B, IntrID);
74527423
case Intrinsic::amdgcn_rsq_clamp:
74537424
return legalizeRsqClampIntrinsic(MI, MRI, B);
7454-
case Intrinsic::amdgcn_ds_fmin:
7455-
case Intrinsic::amdgcn_ds_fmax:
7456-
return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID);
74577425
case Intrinsic::amdgcn_image_bvh_intersect_ray:
74587426
return legalizeBVHIntrinsic(MI, B);
74597427
case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,6 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
175175
bool legalizeRsqClampIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
176176
MachineIRBuilder &B) const;
177177

178-
bool legalizeDSAtomicFPIntrinsic(LegalizerHelper &Helper,
179-
MachineInstr &MI, Intrinsic::ID IID) const;
180-
181178
bool getImplicitArgPtr(Register DstReg, MachineRegisterInfo &MRI,
182179
MachineIRBuilder &B) const;
183180

llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,6 @@ def : SourceOfDivergence<int_amdgcn_flat_atomic_fmin_num>;
252252
def : SourceOfDivergence<int_amdgcn_flat_atomic_fmax_num>;
253253
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd_v2bf16>;
254254
def : SourceOfDivergence<int_amdgcn_flat_atomic_fadd_v2bf16>;
255-
def : SourceOfDivergence<int_amdgcn_ds_fmin>;
256-
def : SourceOfDivergence<int_amdgcn_ds_fmax>;
257255
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_swap>;
258256
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_add>;
259257
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_sub>;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -501,9 +501,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
501501
MemIntrinsicInfo &Info) const {
502502
switch (Inst->getIntrinsicID()) {
503503
case Intrinsic::amdgcn_ds_ordered_add:
504-
case Intrinsic::amdgcn_ds_ordered_swap:
505-
case Intrinsic::amdgcn_ds_fmin:
506-
case Intrinsic::amdgcn_ds_fmax: {
504+
case Intrinsic::amdgcn_ds_ordered_swap: {
507505
auto *Ordering = dyn_cast<ConstantInt>(Inst->getArgOperand(2));
508506
auto *Volatile = dyn_cast<ConstantInt>(Inst->getArgOperand(4));
509507
if (!Ordering || !Volatile)
@@ -1018,8 +1016,6 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
10181016
bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
10191017
Intrinsic::ID IID) const {
10201018
switch (IID) {
1021-
case Intrinsic::amdgcn_ds_fmin:
1022-
case Intrinsic::amdgcn_ds_fmax:
10231019
case Intrinsic::amdgcn_is_shared:
10241020
case Intrinsic::amdgcn_is_private:
10251021
case Intrinsic::amdgcn_flat_atomic_fadd:
@@ -1039,20 +1035,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
10391035
Value *NewV) const {
10401036
auto IntrID = II->getIntrinsicID();
10411037
switch (IntrID) {
1042-
case Intrinsic::amdgcn_ds_fmin:
1043-
case Intrinsic::amdgcn_ds_fmax: {
1044-
const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4));
1045-
if (!IsVolatile->isZero())
1046-
return nullptr;
1047-
Module *M = II->getParent()->getParent()->getParent();
1048-
Type *DestTy = II->getType();
1049-
Type *SrcTy = NewV->getType();
1050-
Function *NewDecl =
1051-
Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy});
1052-
II->setArgOperand(0, NewV);
1053-
II->setCalledFunction(NewDecl);
1054-
return II;
1055-
}
10561038
case Intrinsic::amdgcn_is_shared:
10571039
case Intrinsic::amdgcn_is_private: {
10581040
unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ?

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1279,9 +1279,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
12791279

12801280
switch (IntrID) {
12811281
case Intrinsic::amdgcn_ds_ordered_add:
1282-
case Intrinsic::amdgcn_ds_ordered_swap:
1283-
case Intrinsic::amdgcn_ds_fmin:
1284-
case Intrinsic::amdgcn_ds_fmax: {
1282+
case Intrinsic::amdgcn_ds_ordered_swap: {
12851283
Info.opc = ISD::INTRINSIC_W_CHAIN;
12861284
Info.memVT = MVT::getVT(CI.getType());
12871285
Info.ptrVal = CI.getOperand(0);
@@ -1450,8 +1448,6 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
14501448
case Intrinsic::amdgcn_atomic_cond_sub_u32:
14511449
case Intrinsic::amdgcn_ds_append:
14521450
case Intrinsic::amdgcn_ds_consume:
1453-
case Intrinsic::amdgcn_ds_fmax:
1454-
case Intrinsic::amdgcn_ds_fmin:
14551451
case Intrinsic::amdgcn_ds_ordered_add:
14561452
case Intrinsic::amdgcn_ds_ordered_swap:
14571453
case Intrinsic::amdgcn_flat_atomic_fadd:
@@ -8899,15 +8895,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
88998895
M->getVTList(), Ops, M->getMemoryVT(),
89008896
M->getMemOperand());
89018897
}
8902-
case Intrinsic::amdgcn_ds_fmin:
8903-
case Intrinsic::amdgcn_ds_fmax: {
8904-
MemSDNode *M = cast<MemSDNode>(Op);
8905-
unsigned Opc = IntrID == Intrinsic::amdgcn_ds_fmin ? ISD::ATOMIC_LOAD_FMIN
8906-
: ISD::ATOMIC_LOAD_FMAX;
8907-
return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(), M->getOperand(0),
8908-
M->getOperand(2), M->getOperand(3),
8909-
M->getMemOperand());
8910-
}
89118898
case Intrinsic::amdgcn_raw_buffer_load:
89128899
case Intrinsic::amdgcn_raw_ptr_buffer_load:
89138900
case Intrinsic::amdgcn_raw_buffer_load_format:

llvm/test/Bitcode/amdgcn-atomic.ll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,4 +248,56 @@ define <2 x i16> @upgrade_amdgcn_ds_fadd_v2bf16__missing_args_as_i16(ptr addrspa
248248
ret <2 x i16> %result0
249249
}
250250

251+
declare float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
252+
declare double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) nocapture, double, i32 immarg, i32 immarg, i1 immarg)
253+
254+
define float @upgrade_amdgcn_ds_fmin_f32(ptr addrspace(3) %ptr, float %val) {
255+
; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
256+
%result0 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
257+
258+
; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
259+
%result1 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
260+
261+
; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
262+
%result2 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 43, i32 3, i1 false)
263+
264+
; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") acquire, align 4
265+
%result3 = call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %ptr, float %val, i32 4, i32 2, i1 false)
266+
267+
ret float %result3
268+
}
269+
270+
define double @upgrade_amdgcn_ds_fmin_f64(ptr addrspace(3) %ptr, double %val) {
271+
; CHECK: atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
272+
%result0 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 false)
273+
274+
; CHECK: = atomicrmw volatile fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
275+
%result1 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 0, i32 0, i1 true)
276+
277+
; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") seq_cst, align 8
278+
%result2 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 43, i32 3, i1 false)
279+
280+
; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, double %val syncscope("agent") acquire, align 8
281+
%result3 = call double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) %ptr, double %val, i32 4, i32 2, i1 false)
282+
283+
ret double %result3
284+
}
285+
286+
declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
287+
288+
define float @upgrade_amdgcn_ds_fmin_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
289+
; CHECK: = atomicrmw fmin ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
290+
291+
%result0 = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
292+
ret float %result0
293+
}
294+
295+
declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg)
296+
297+
define float @upgrade_amdgcn_ds_fmax_f32_no_suffix(ptr addrspace(3) %ptr, float %val) {
298+
; CHECK: = atomicrmw fmax ptr addrspace(3) %ptr, float %val syncscope("agent") seq_cst, align 4
299+
%result0 = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
300+
ret float %result0
301+
}
302+
251303
attributes #0 = { argmemonly nounwind willreturn }

0 commit comments

Comments
 (0)