Skip to content

Commit 199b524

Browse files
committed
AMDGPU: Add amdgpu.no.remote.memory when upgrading old atomic intrinsics
This should replicate the old intrinsic behavior better when codegen of the raw instruction will require metadata in the future.
1 parent 557bf38 commit 199b524

File tree

2 files changed

+22
-20
lines changed

2 files changed

+22
-20
lines changed

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2358,6 +2358,8 @@ static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
23582358
SyncScope::ID SSID = F->getContext().getOrInsertSyncScopeID("agent");
23592359
AtomicRMWInst *RMW =
23602360
Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
2361+
RMW->setMetadata("amdgpu.no.remote.memory",
2362+
MDNode::get(F->getContext(), {}));
23612363

23622364
if (!VolatileArg || !VolatileArg->isZero())
23632365
RMW->setVolatile(true);

llvm/test/Bitcode/amdgcn-atomic.ll

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33

44
define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) {
5-
; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
5+
; CHECK: atomicrmw uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
66
%result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false)
77

8-
; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4
8+
; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
99
%result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false)
1010

11-
; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4
11+
; CHECK: atomicrmw uinc_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
1212
%result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr3, i32 46, i32 0, i32 0, i1 false)
1313

1414
; CHECK: atomicrmw uinc_wrap ptr %ptr0, i64 48 syncscope("agent") seq_cst, align 8
@@ -26,13 +26,13 @@ define void @atomic_inc(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr
2626
}
2727

2828
define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) {
29-
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
29+
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
3030
%result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 0, i1 false)
3131

32-
; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4
32+
; CHECK: atomicrmw udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
3333
%result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 false)
3434

35-
; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4
35+
; CHECK: atomicrmw udec_wrap ptr addrspace(3) %ptr3, i32 46 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
3636
%result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr3, i32 46, i32 0, i32 0, i1 false)
3737

3838
; CHECK: atomicrmw udec_wrap ptr %ptr0, i64 48 syncscope("agent") seq_cst, align 8
@@ -51,49 +51,49 @@ define void @atomic_dec(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr
5151

5252
; Test some invalid ordering handling
5353
define void @ordering(ptr %ptr0, ptr addrspace(1) %ptr1, ptr addrspace(3) %ptr3) {
54-
; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
54+
; CHECK: atomicrmw volatile uinc_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
5555
%result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr0, i32 42, i32 -1, i32 0, i1 true)
5656

57-
; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4
57+
; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
5858
%result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 0, i32 0, i1 true)
5959

60-
; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4
60+
; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
6161
%result2 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 1, i32 0, i1 false)
6262

63-
; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") monotonic, align 4
63+
; CHECK: atomicrmw volatile uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") monotonic, align 4, !amdgpu.no.remote.memory !0
6464
%result3 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 2, i32 0, i1 true)
6565

66-
; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4
66+
; CHECK: atomicrmw uinc_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
6767
%result4 = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 false)
6868

69-
; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
69+
; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
7070
%result5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 4, i1 true)
7171

72-
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
72+
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
7373
%result6 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 5, i1 false)
7474

75-
; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
75+
; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
7676
%result7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 6, i1 true)
7777

78-
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
78+
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
7979
%result8 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 7, i1 false)
8080

81-
; CHECK:= atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
81+
; CHECK:= atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
8282
%result9 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 0, i32 8, i1 true)
8383

84-
; CHECK:= atomicrmw volatile udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4
84+
; CHECK:= atomicrmw volatile udec_wrap ptr addrspace(1) %ptr1, i32 43 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
8585
%result10 = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr1, i32 43, i32 3, i32 0, i1 true)
8686
ret void
8787
}
8888

8989
define void @immarg_violations(ptr %ptr0, i32 %val32, i1 %val1) {
90-
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4
90+
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
9191
%result0 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 %val32, i32 0, i1 false)
9292

93-
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4
93+
; CHECK: atomicrmw udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !amdgpu.no.remote.memory !0
9494
%result1 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 %val32, i1 false)
9595

96-
; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4
96+
; CHECK: atomicrmw volatile udec_wrap ptr %ptr0, i32 42 syncscope("agent") monotonic, align 4, !amdgpu.no.remote.memory !0
9797
%result2 = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr0, i32 42, i32 2, i32 0, i1 %val1)
9898
ret void
9999
}

0 commit comments

Comments
 (0)