Skip to content

Commit c58ee05

Browse files
committed
AtomicExpand: Preserve metadata when expanding partword RMW
This will be important for AMDGPU in a future patch.
1 parent 0592560 commit c58ee05

File tree

2 files changed

+12
-9
lines changed

2 files changed

+12
-9
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,10 @@ AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
965965
AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
966966
Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
967967
AI->getOrdering(), AI->getSyncScopeID());
968-
// TODO: Preserve metadata
968+
969+
// TODO: Do we need to drop noundef? We widened the operation and could be
970+
// loading undefined bits.
971+
NewAI->copyMetadata(*AI);
969972

970973
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
971974
AI->replaceAllUsesWith(FinalOldResult);

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_drop_md(ptr addrspace(1) %ptr, i
189189
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
190190
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
191191
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
192-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
192+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0:![0-9]+]], !some.unknown.md [[META0]]
193193
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
194194
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
195195
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -203,7 +203,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_drop_md(ptr addrspace(1)
203203
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_drop_md(
204204
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
205205
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
206-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
206+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !some.unknown.md [[META0]]
207207
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
208208
; CHECK-NEXT: ret i16 [[EXTRACTED]]
209209
;
@@ -224,7 +224,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_preserve_mmra(ptr addrspace(1) %
224224
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
225225
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
226226
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
227-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0:![0-9]+]]
227+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !mmra [[META1:![0-9]+]]
228228
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
229229
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
230230
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -238,7 +238,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(ptr addrspa
238238
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(
239239
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
240240
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
241-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0]]
241+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !mmra [[META1]]
242242
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
243243
; CHECK-NEXT: ret i16 [[EXTRACTED]]
244244
;
@@ -306,7 +306,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr add
306306
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
307307
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
308308
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
309-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
309+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
310310
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
311311
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
312312
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -319,7 +319,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
319319
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
320320
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
321321
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
322-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
322+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
323323
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
324324
; CHECK-NEXT: ret i16 [[EXTRACTED]]
325325
;
@@ -339,7 +339,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(p
339339
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
340340
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
341341
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
342-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
342+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
343343
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
344344
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
345345
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -352,7 +352,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_m
352352
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory(
353353
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
354354
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
355-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
355+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
356356
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
357357
; CHECK-NEXT: ret i16 [[EXTRACTED]]
358358
;

0 commit comments

Comments
 (0)