Skip to content

Commit 367d4d5

Browse files
committed
Only copy specific metadata
1 parent c58ee05 commit 367d4d5

File tree

2 files changed

+44
-15
lines changed

2 files changed

+44
-15
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "llvm/IR/InstIterator.h"
3838
#include "llvm/IR/Instruction.h"
3939
#include "llvm/IR/Instructions.h"
40+
#include "llvm/IR/MDBuilder.h"
4041
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
4142
#include "llvm/IR/Module.h"
4243
#include "llvm/IR/Type.h"
@@ -937,6 +938,36 @@ void AtomicExpandImpl::expandPartwordAtomicRMW(
937938
AI->eraseFromParent();
938939
}
939940

941+
/// Copy metadata that's safe to preserve when widening atomics.
942+
static void copyMetadataForAtomic(Instruction &Dest,
943+
const Instruction &Source) {
944+
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
945+
Source.getAllMetadata(MD);
946+
LLVMContext &Ctx = Dest.getContext();
947+
MDBuilder MDB(Ctx);
948+
949+
for (auto [ID, N] : MD) {
950+
switch (ID) {
951+
case LLVMContext::MD_dbg:
952+
case LLVMContext::MD_tbaa:
953+
case LLVMContext::MD_tbaa_struct:
954+
case LLVMContext::MD_alias_scope:
955+
case LLVMContext::MD_noalias:
956+
case LLVMContext::MD_access_group:
957+
case LLVMContext::MD_mmra:
958+
Dest.setMetadata(ID, N);
959+
break;
960+
default:
961+
if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
962+
Dest.setMetadata(ID, N);
963+
else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
964+
Dest.setMetadata(ID, N);
965+
966+
break;
967+
}
968+
}
969+
}
970+
940971
// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
941972
AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
942973
ReplacementIRBuilder Builder(AI, *DL);
@@ -966,9 +997,7 @@ AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
966997
Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
967998
AI->getOrdering(), AI->getSyncScopeID());
968999

969-
// TODO: Do we need to drop noundef? We widened the operation and could be
970-
// loading undefined bits.
971-
NewAI->copyMetadata(*AI);
1000+
copyMetadataForAtomic(*NewAI, *AI);
9721001

9731002
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
9741003
AI->replaceAllUsesWith(FinalOldResult);

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_drop_md(ptr addrspace(1) %ptr, i
189189
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
190190
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
191191
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
192-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0:![0-9]+]], !some.unknown.md [[META0]]
192+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
193193
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
194194
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
195195
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -203,7 +203,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_drop_md(ptr addrspace(1)
203203
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_drop_md(
204204
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
205205
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
206-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !some.unknown.md [[META0]]
206+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
207207
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
208208
; CHECK-NEXT: ret i16 [[EXTRACTED]]
209209
;
@@ -224,7 +224,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_preserve_mmra(ptr addrspace(1) %
224224
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
225225
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
226226
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
227-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !mmra [[META1:![0-9]+]]
227+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0:![0-9]+]]
228228
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
229229
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
230230
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -238,7 +238,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(ptr addrspa
238238
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(
239239
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
240240
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
241-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !mmra [[META1]]
241+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0]]
242242
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
243243
; CHECK-NEXT: ret i16 [[EXTRACTED]]
244244
;
@@ -250,7 +250,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(ptr
250250
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(
251251
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
252252
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
253-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
253+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !alias.scope [[META1:![0-9]+]]
254254
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
255255
; CHECK-NEXT: ret i16 [[EXTRACTED]]
256256
;
@@ -262,7 +262,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(ptr addr
262262
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(
263263
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
264264
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
265-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
265+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noalias [[META1]]
266266
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
267267
; CHECK-NEXT: ret i16 [[EXTRACTED]]
268268
;
@@ -274,7 +274,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(ptr
274274
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(
275275
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
276276
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
277-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
277+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa.struct [[TBAA_STRUCT4:![0-9]+]]
278278
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
279279
; CHECK-NEXT: ret i16 [[EXTRACTED]]
280280
;
@@ -286,7 +286,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(ptr addrspa
286286
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(
287287
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
288288
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
289-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
289+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa [[TBAA5:![0-9]+]]
290290
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
291291
; CHECK-NEXT: ret i16 [[EXTRACTED]]
292292
;
@@ -306,7 +306,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr add
306306
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
307307
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
308308
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
309-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
309+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8:![0-9]+]]
310310
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
311311
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
312312
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -319,7 +319,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
319319
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
320320
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
321321
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
322-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
322+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8]]
323323
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
324324
; CHECK-NEXT: ret i16 [[EXTRACTED]]
325325
;
@@ -339,7 +339,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(p
339339
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
340340
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
341341
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
342-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
342+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]]
343343
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
344344
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
345345
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -352,7 +352,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_m
352352
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory(
353353
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
354354
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
355-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
355+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]]
356356
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
357357
; CHECK-NEXT: ret i16 [[EXTRACTED]]
358358
;

0 commit comments

Comments
 (0)