Skip to content

Commit d811708

Browse files
authored
AtomicExpand: Preserve metadata when expanding partword RMW (#89769)
This will be important for AMDGPU in a future patch.
1 parent 7aa382f commit d811708

File tree

2 files changed

+135
-11
lines changed

2 files changed

+135
-11
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "llvm/IR/InstIterator.h"
3838
#include "llvm/IR/Instruction.h"
3939
#include "llvm/IR/Instructions.h"
40+
#include "llvm/IR/MDBuilder.h"
4041
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
4142
#include "llvm/IR/Module.h"
4243
#include "llvm/IR/Type.h"
@@ -937,6 +938,36 @@ void AtomicExpandImpl::expandPartwordAtomicRMW(
937938
AI->eraseFromParent();
938939
}
939940

941+
/// Copy metadata that's safe to preserve when widening atomics.
942+
static void copyMetadataForAtomic(Instruction &Dest,
943+
const Instruction &Source) {
944+
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
945+
Source.getAllMetadata(MD);
946+
LLVMContext &Ctx = Dest.getContext();
947+
MDBuilder MDB(Ctx);
948+
949+
for (auto [ID, N] : MD) {
950+
switch (ID) {
951+
case LLVMContext::MD_dbg:
952+
case LLVMContext::MD_tbaa:
953+
case LLVMContext::MD_tbaa_struct:
954+
case LLVMContext::MD_alias_scope:
955+
case LLVMContext::MD_noalias:
956+
case LLVMContext::MD_access_group:
957+
case LLVMContext::MD_mmra:
958+
Dest.setMetadata(ID, N);
959+
break;
960+
default:
961+
if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
962+
Dest.setMetadata(ID, N);
963+
else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
964+
Dest.setMetadata(ID, N);
965+
966+
break;
967+
}
968+
}
969+
}
970+
940971
// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
941972
AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
942973
ReplacementIRBuilder Builder(AI, *DL);
@@ -965,7 +996,8 @@ AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
965996
AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
966997
Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
967998
AI->getOrdering(), AI->getSyncScopeID());
968-
// TODO: Preserve metadata
999+
1000+
copyMetadataForAtomic(*NewAI, *AI);
9691001

9701002
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
9711003
AI->replaceAllUsesWith(FinalOldResult);

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll

Lines changed: 102 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -176,9 +176,9 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4(ptr addrspace(1) %ptr, i1
176176
ret i16 %res
177177
}
178178

179-
; Preserve unknown metadata
180-
define i16 @test_atomicrmw_and_i16_global_agent_preserve_md(ptr addrspace(1) %ptr, i16 %value) {
181-
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_preserve_md(
179+
; Drop unknown metadata and noundef
180+
define i16 @test_atomicrmw_and_i16_global_agent_drop_md(ptr addrspace(1) %ptr, i16 %value) {
181+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_drop_md(
182182
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
183183
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
184184
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
@@ -198,9 +198,9 @@ define i16 @test_atomicrmw_and_i16_global_agent_preserve_md(ptr addrspace(1) %pt
198198
ret i16 %res
199199
}
200200

201-
; Preserve unknown metadata
202-
define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_md(ptr addrspace(1) %ptr, i16 %value) {
203-
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_md(
201+
; Drop unknown metadata
202+
define i16 @test_atomicrmw_and_i16_global_agent_align4_drop_md(ptr addrspace(1) %ptr, i16 %value) {
203+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_drop_md(
204204
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
205205
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
206206
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
@@ -211,6 +211,89 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_md(ptr addrspace
211211
ret i16 %res
212212
}
213213

214+
; Drop noundef, preserve mmra
215+
define i16 @test_atomicrmw_and_i16_global_agent_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) {
216+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_preserve_mmra(
217+
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
218+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
219+
; CHECK-NEXT: [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
220+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
221+
; CHECK-NEXT: [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
222+
; CHECK-NEXT: [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
223+
; CHECK-NEXT: [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
224+
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
225+
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
226+
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
227+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0:![0-9]+]]
228+
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
229+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
230+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
231+
;
232+
%res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !noundef !0, !mmra !1
233+
ret i16 %res
234+
}
235+
236+
; Drop noundef, preserve mmra
237+
define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) {
238+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(
239+
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
240+
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
241+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0]]
242+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
243+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
244+
;
245+
%res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noundef !0, !mmra !1
246+
ret i16 %res
247+
}
248+
249+
define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(ptr addrspace(1) %ptr, i16 %value) {
250+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(
251+
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
252+
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
253+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !alias.scope [[META1:![0-9]+]]
254+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
255+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
256+
;
257+
%res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !alias.scope !2
258+
ret i16 %res
259+
}
260+
261+
define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(ptr addrspace(1) %ptr, i16 %value) {
262+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(
263+
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
264+
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
265+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noalias [[META1]]
266+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
267+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
268+
;
269+
%res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noalias !2
270+
ret i16 %res
271+
}
272+
273+
define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(ptr addrspace(1) %ptr, i16 %value) {
274+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(
275+
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
276+
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
277+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa.struct [[TBAA_STRUCT4:![0-9]+]]
278+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
279+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
280+
;
281+
%res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa.struct !5
282+
ret i16 %res
283+
}
284+
285+
define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(ptr addrspace(1) %ptr, i16 %value) {
286+
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(
287+
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
288+
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
289+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa [[TBAA5:![0-9]+]]
290+
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
291+
; CHECK-NEXT: ret i16 [[EXTRACTED]]
292+
;
293+
%res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa !6
294+
ret i16 %res
295+
}
296+
214297
define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i16 %value) {
215298
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(
216299
; CHECK-NEXT: [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
@@ -223,7 +306,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr add
223306
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
224307
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
225308
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
226-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
309+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8:![0-9]+]]
227310
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
228311
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
229312
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -236,7 +319,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
236319
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
237320
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
238321
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
239-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
322+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8]]
240323
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
241324
; CHECK-NEXT: ret i16 [[EXTRACTED]]
242325
;
@@ -256,7 +339,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(p
256339
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
257340
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
258341
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
259-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
342+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]]
260343
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
261344
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
262345
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -269,7 +352,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_m
269352
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory(
270353
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
271354
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
272-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
355+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]]
273356
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
274357
; CHECK-NEXT: ret i16 [[EXTRACTED]]
275358
;
@@ -1180,6 +1263,15 @@ define bfloat @test_atomicrmw_xchg_bf16_global_agent_align4(ptr addrspace(1) %pt
11801263
}
11811264

11821265
!0 = !{}
1266+
!1 = !{!"foo", !"bar"}
1267+
!2 = !{!3}
1268+
!3 = distinct !{!3, !4}
1269+
!4 = distinct !{!4}
1270+
!5 = !{i64 0, i64 4, !1, i64 8, i64 4}
1271+
!6 = !{!7, !7, i64 0}
1272+
!7 = !{!"omnipotent char", !8, i64 0}
1273+
!8 = !{!"Simple C/C++ TBAA"}
1274+
11831275
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
11841276
; BASE: {{.*}}
11851277
; GCN: {{.*}}

0 commit comments

Comments
 (0)