Skip to content

Commit 7927bcd

Browse files
authored
AMDGPU: Do not bitcast atomicrmw in IR (#90045)
This is the first step to eliminating shouldCastAtomicRMWIInIR. This and the other atomic expand casting hooks should be removed. This adds duplicate legalization machinery and interfaces. This is already what codegen is supposed to do, and already does for the promotion case. In the case of atomicrmw xchg, there seems to be some benefit to having the bitcasts moved outside of the cmpxchg loop on targets with separate int and FP registers, which we should be able to deal with by directly checking for the legality of the underlying operation. The casting path was also losing metadata when it recreated the instruction.
1 parent 5c51165 commit 7927bcd

File tree

9 files changed

+126
-90
lines changed

9 files changed

+126
-90
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -909,9 +909,10 @@ void AtomicExpandImpl::expandPartwordAtomicRMW(
909909
Value *ValOperand_Shifted = nullptr;
910910
if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add ||
911911
Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) {
912+
Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
912913
ValOperand_Shifted =
913-
Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
914-
PMV.ShiftAmt, "ValOperand_Shifted");
914+
Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
915+
"ValOperand_Shifted");
915916
}
916917

917918
auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5988,6 +5988,13 @@ AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
59885988
case AtomicRMWInst::FMax:
59895989
case AtomicRMWInst::FMin:
59905990
return AtomicExpansionKind::CmpXChg;
5991+
case AtomicRMWInst::Xchg: {
5992+
const DataLayout &DL = RMW->getFunction()->getParent()->getDataLayout();
5993+
unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
5994+
if (ValSize == 32 || ValSize == 64)
5995+
return AtomicExpansionKind::None;
5996+
return AtomicExpansionKind::CmpXChg;
5997+
}
59915998
default: {
59925999
if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
59936000
unsigned Size = IntTy->getBitWidth();

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ class AMDGPUTargetLowering : public TargetLowering {
236236
return AtomicExpansionKind::None;
237237
}
238238

239+
AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *) const override {
240+
return AtomicExpansionKind::None;
241+
}
242+
239243
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
240244
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
241245

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-agent.ll

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
define float @test_atomicrmw_xchg_f32_global_agent(ptr addrspace(1) %ptr, float %value) {
1717
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent(
1818
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
19-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
20-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] syncscope("agent") seq_cst, align 4
21-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
19+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4
2220
; COMMON-NEXT: ret float [[RES]]
2321
;
2422
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst
@@ -29,9 +27,7 @@ define float @test_atomicrmw_xchg_f32_global_agent(ptr addrspace(1) %ptr, float
2927
define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
3028
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory(
3129
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
32-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
33-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] syncscope("agent") seq_cst, align 4
34-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
30+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
3531
; COMMON-NEXT: ret float [[RES]]
3632
;
3733
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
@@ -42,9 +38,7 @@ define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memor
4238
define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
4339
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_remote_memory(
4440
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
45-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
46-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] syncscope("agent") seq_cst, align 4
47-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
41+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4842
; COMMON-NEXT: ret float [[RES]]
4943
;
5044
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
@@ -55,9 +49,7 @@ define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_remote_memory(ptr
5549
define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
5650
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
5751
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
58-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
59-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] syncscope("agent") seq_cst, align 4
60-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
52+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
6153
; COMMON-NEXT: ret float [[RES]]
6254
;
6355
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@@ -268,7 +260,7 @@ define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memor
268260
;
269261
; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
270262
; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
271-
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
263+
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
272264
; GFX940-NEXT: ret float [[RES]]
273265
;
274266
; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_agent__amdgpu_no_fine_grained_memory(
@@ -3713,5 +3705,19 @@ attributes #1 = { "denormal-fp-mode-f32"="dynamic,dynamic" }
37133705

37143706
!0 = !{}
37153707
;.
3708+
; GFX803: [[META0]] = !{}
3709+
;.
3710+
; GFX906: [[META0]] = !{}
3711+
;.
3712+
; GFX908: [[META0]] = !{}
3713+
;.
3714+
; GFX90A: [[META0]] = !{}
3715+
;.
37163716
; GFX940: [[META0]] = !{}
37173717
;.
3718+
; GFX10: [[META0]] = !{}
3719+
;.
3720+
; GFX11: [[META0]] = !{}
3721+
;.
3722+
; GFX12: [[META0]] = !{}
3723+
;.

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f32-system.ll

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
define float @test_atomicrmw_xchg_f32_global_system(ptr addrspace(1) %ptr, float %value) {
1717
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system(
1818
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
19-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
20-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] seq_cst, align 4
21-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
19+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4
2220
; COMMON-NEXT: ret float [[RES]]
2321
;
2422
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst
@@ -29,9 +27,7 @@ define float @test_atomicrmw_xchg_f32_global_system(ptr addrspace(1) %ptr, float
2927
define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %value) {
3028
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory(
3129
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
32-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
33-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] seq_cst, align 4
34-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
30+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
3531
; COMMON-NEXT: ret float [[RES]]
3632
;
3733
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0
@@ -42,9 +38,7 @@ define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memo
4238
define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
4339
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_remote_memory(
4440
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
45-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
46-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] seq_cst, align 4
47-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
41+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
4842
; COMMON-NEXT: ret float [[RES]]
4943
;
5044
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.remote.memory !0
@@ -55,9 +49,7 @@ define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_remote_memory(ptr
5549
define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, float %value) {
5650
; COMMON-LABEL: define float @test_atomicrmw_xchg_f32_global_system__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
5751
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
58-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast float [[VALUE]] to i32
59-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i32 [[TMP1]] seq_cst, align 4
60-
; COMMON-NEXT: [[RES:%.*]] = bitcast i32 [[TMP2]] to float
52+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
6153
; COMMON-NEXT: ret float [[RES]]
6254
;
6355
%res = atomicrmw xchg ptr addrspace(1) %ptr, float %value seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@@ -268,7 +260,7 @@ define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memo
268260
;
269261
; GFX940-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory(
270262
; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], float [[VALUE:%.*]]) #[[ATTR0]] {
271-
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
263+
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], float [[VALUE]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
272264
; GFX940-NEXT: ret float [[RES]]
273265
;
274266
; GFX10-LABEL: define float @test_atomicrmw_fadd_f32_global_system__amdgpu_no_fine_grained_memory(
@@ -3713,5 +3705,19 @@ attributes #1 = { "denormal-fp-mode-f32"="dynamic,dynamic" }
37133705

37143706
!0 = !{}
37153707
;.
3708+
; GFX803: [[META0]] = !{}
3709+
;.
3710+
; GFX906: [[META0]] = !{}
3711+
;.
3712+
; GFX908: [[META0]] = !{}
3713+
;.
3714+
; GFX90A: [[META0]] = !{}
3715+
;.
37163716
; GFX940: [[META0]] = !{}
37173717
;.
3718+
; GFX10: [[META0]] = !{}
3719+
;.
3720+
; GFX11: [[META0]] = !{}
3721+
;.
3722+
; GFX12: [[META0]] = !{}
3723+
;.

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-f64-agent.ll

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
define double @test_atomicrmw_xchg_f64_global_agent(ptr addrspace(1) %ptr, double %value) {
1717
; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent(
1818
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
19-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast double [[VALUE]] to i64
20-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[TMP1]] syncscope("agent") seq_cst, align 8
21-
; COMMON-NEXT: [[RES:%.*]] = bitcast i64 [[TMP2]] to double
19+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8
2220
; COMMON-NEXT: ret double [[RES]]
2321
;
2422
%res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst
@@ -29,9 +27,7 @@ define double @test_atomicrmw_xchg_f64_global_agent(ptr addrspace(1) %ptr, doubl
2927
define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %value) {
3028
; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory(
3129
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
32-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast double [[VALUE]] to i64
33-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[TMP1]] syncscope("agent") seq_cst, align 8
34-
; COMMON-NEXT: [[RES:%.*]] = bitcast i64 [[TMP2]] to double
30+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
3531
; COMMON-NEXT: ret double [[RES]]
3632
;
3733
%res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
@@ -42,9 +38,7 @@ define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memo
4238
define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
4339
; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_remote_memory(
4440
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
45-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast double [[VALUE]] to i64
46-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[TMP1]] syncscope("agent") seq_cst, align 8
47-
; COMMON-NEXT: [[RES:%.*]] = bitcast i64 [[TMP2]] to double
41+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.remote.memory [[META0]]
4842
; COMMON-NEXT: ret double [[RES]]
4943
;
5044
%res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
@@ -55,9 +49,7 @@ define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_remote_memory(ptr
5549
define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, double %value) {
5650
; COMMON-LABEL: define double @test_atomicrmw_xchg_f64_global_agent__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory(
5751
; COMMON-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
58-
; COMMON-NEXT: [[TMP1:%.*]] = bitcast double [[VALUE]] to i64
59-
; COMMON-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], i64 [[TMP1]] syncscope("agent") seq_cst, align 8
60-
; COMMON-NEXT: [[RES:%.*]] = bitcast i64 [[TMP2]] to double
52+
; COMMON-NEXT: [[RES:%.*]] = atomicrmw xchg ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.no.remote.memory [[META0]]
6153
; COMMON-NEXT: ret double [[RES]]
6254
;
6355
%res = atomicrmw xchg ptr addrspace(1) %ptr, double %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0
@@ -268,7 +260,7 @@ define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memo
268260
;
269261
; GFX940-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
270262
; GFX940-SAME: ptr addrspace(1) [[PTR:%.*]], double [[VALUE:%.*]]) #[[ATTR0]] {
271-
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]]
263+
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR]], double [[VALUE]] syncscope("agent") seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META0]]
272264
; GFX940-NEXT: ret double [[RES]]
273265
;
274266
; GFX10-LABEL: define double @test_atomicrmw_fadd_f64_global_agent__amdgpu_no_fine_grained_memory(
@@ -1681,5 +1673,19 @@ attributes #1 = { "denormal-fp-mode"="dynamic,dynamic" }
16811673

16821674
!0 = !{}
16831675
;.
1676+
; GFX803: [[META0]] = !{}
1677+
;.
1678+
; GFX906: [[META0]] = !{}
1679+
;.
1680+
; GFX908: [[META0]] = !{}
1681+
;.
1682+
; GFX90A: [[META0]] = !{}
1683+
;.
16841684
; GFX940: [[META0]] = !{}
16851685
;.
1686+
; GFX10: [[META0]] = !{}
1687+
;.
1688+
; GFX11: [[META0]] = !{}
1689+
;.
1690+
; GFX12: [[META0]] = !{}
1691+
;.

0 commit comments

Comments
 (0)