Skip to content

Commit bc3284d

Browse files
committed
AtomicExpand: Preserve metadata when expanding partword RMW
This will be important for AMDGPU in a future patch.
1 parent 7927bcd commit bc3284d

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,10 @@ AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
965965
AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
966966
Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
967967
AI->getOrdering(), AI->getSyncScopeID());
968-
// TODO: Preserve metadata
968+
969+
// TODO: Do we need to drop noundef? We widened the operation and could be
970+
// loading undefined bits.
971+
NewAI->copyMetadata(*AI);
969972

970973
Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
971974
AI->replaceAllUsesWith(FinalOldResult);

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_preserve_md(ptr addrspace(1) %pt
189189
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
190190
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
191191
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
192-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
192+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0:![0-9]+]], !some.unknown.md [[META0]]
193193
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
194194
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
195195
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -203,7 +203,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_md(ptr addrspace
203203
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_md(
204204
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
205205
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
206-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
206+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noundef [[META0]], !some.unknown.md [[META0]]
207207
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
208208
; CHECK-NEXT: ret i16 [[EXTRACTED]]
209209
;
@@ -223,7 +223,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr add
223223
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
224224
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
225225
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
226-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
226+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
227227
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
228228
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
229229
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -236,7 +236,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
236236
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
237237
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
238238
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
239-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
239+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META0]]
240240
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
241241
; CHECK-NEXT: ret i16 [[EXTRACTED]]
242242
;
@@ -256,7 +256,7 @@ define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(p
256256
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
257257
; CHECK-NEXT: [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
258258
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
259-
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
259+
; CHECK-NEXT: [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
260260
; CHECK-NEXT: [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
261261
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
262262
; CHECK-NEXT: ret i16 [[EXTRACTED]]
@@ -269,7 +269,7 @@ define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_m
269269
; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory(
270270
; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
271271
; CHECK-NEXT: [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
272-
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
272+
; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0]]
273273
; CHECK-NEXT: [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
274274
; CHECK-NEXT: ret i16 [[EXTRACTED]]
275275
;

0 commit comments

Comments
 (0)