Skip to content

Commit 4d80b08

Browse files
committed
AMDGPU: Preserve alignment when custom expanding atomicrmw
1 parent 66bd5d7 commit 4d80b08

File tree

2 files changed

+59
-3
lines changed

2 files changed

+59
-3
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16657,6 +16657,7 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
1665716657
Value *Val = AI->getValOperand();
1665816658
Type *ValTy = Val->getType();
1665916659
Value *Addr = AI->getPointerOperand();
16660+
Align Alignment = AI->getAlign();
1666016661

1666116662
auto CreateNewAtomicRMW = [AI](IRBuilder<> &Builder, Value *Addr,
1666216663
Value *Val) -> Value * {
@@ -16690,12 +16691,12 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
1669016691
Builder.SetInsertPoint(PrivateBB);
1669116692
Value *CastToPrivate = Builder.CreateAddrSpaceCast(
1669216693
Addr, PointerType::get(Ctx, AMDGPUAS::PRIVATE_ADDRESS));
16693-
Value *LoadedPrivate =
16694-
Builder.CreateLoad(ValTy, CastToPrivate, "loaded.private");
16694+
Value *LoadedPrivate = Builder.CreateAlignedLoad(ValTy, CastToPrivate,
16695+
Alignment, "loaded.private");
1669516696

1669616697
Value *NewVal = buildAtomicRMWValue(Op, Builder, LoadedPrivate, Val);
1669716698

16698-
Builder.CreateStore(NewVal, CastToPrivate);
16699+
Builder.CreateAlignedStore(NewVal, CastToPrivate, Alignment);
1669916700
Builder.CreateBr(PhiBB);
1670016701

1670116702
Builder.SetInsertPoint(GlobalBB);

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-rmw-fadd-flat-specialization.ll

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,4 +294,59 @@ define float @no_unsafe(ptr %addr, float %val) {
294294
ret float %res
295295
}
296296

297+
define float @flat_atomicrmw_fadd_f32__align32(ptr %addr, float %val) {
298+
; GFX908-LABEL: @flat_atomicrmw_fadd_f32__align32(
299+
; GFX908-NEXT: [[TMP1:%.*]] = load float, ptr [[ADDR:%.*]], align 32
300+
; GFX908-NEXT: br label [[ATOMICRMW_START:%.*]]
301+
; GFX908: atomicrmw.start:
302+
; GFX908-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
303+
; GFX908-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VAL:%.*]]
304+
; GFX908-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
305+
; GFX908-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
306+
; GFX908-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[ADDR]], i32 [[TMP3]], i32 [[TMP2]] seq_cst seq_cst, align 32
307+
; GFX908-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
308+
; GFX908-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
309+
; GFX908-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
310+
; GFX908-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
311+
; GFX908: atomicrmw.end:
312+
; GFX908-NEXT: ret float [[TMP5]]
313+
;
314+
; GFX90A-LABEL: @flat_atomicrmw_fadd_f32__align32(
315+
; GFX90A-NEXT: [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[ADDR:%.*]])
316+
; GFX90A-NEXT: br i1 [[IS_SHARED]], label [[ATOMICRMW_SHARED:%.*]], label [[ATOMICRMW_CHECK_PRIVATE:%.*]]
317+
; GFX90A: atomicrmw.shared:
318+
; GFX90A-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(3)
319+
; GFX90A-NEXT: [[TMP2:%.*]] = atomicrmw fadd ptr addrspace(3) [[TMP1]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
320+
; GFX90A-NEXT: br label [[ATOMICRMW_PHI:%.*]]
321+
; GFX90A: atomicrmw.check.private:
322+
; GFX90A-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[ADDR]])
323+
; GFX90A-NEXT: br i1 [[IS_PRIVATE]], label [[ATOMICRMW_PRIVATE:%.*]], label [[ATOMICRMW_GLOBAL:%.*]]
324+
; GFX90A: atomicrmw.private:
325+
; GFX90A-NEXT: [[TMP3:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
326+
; GFX90A-NEXT: [[LOADED_PRIVATE:%.*]] = load float, ptr addrspace(5) [[TMP3]], align 32
327+
; GFX90A-NEXT: [[NEW:%.*]] = fadd float [[LOADED_PRIVATE]], [[VAL]]
328+
; GFX90A-NEXT: store float [[NEW]], ptr addrspace(5) [[TMP3]], align 32
329+
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
330+
; GFX90A: atomicrmw.global:
331+
; GFX90A-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
332+
; GFX90A-NEXT: [[TMP5:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP4]], float [[VAL]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
333+
; GFX90A-NEXT: br label [[ATOMICRMW_PHI]]
334+
; GFX90A: atomicrmw.phi:
335+
; GFX90A-NEXT: [[RES:%.*]] = phi float [ [[TMP2]], [[ATOMICRMW_SHARED]] ], [ [[LOADED_PRIVATE]], [[ATOMICRMW_PRIVATE]] ], [ [[TMP5]], [[ATOMICRMW_GLOBAL]] ]
336+
; GFX90A-NEXT: br label [[ATOMICRMW_END:%.*]]
337+
; GFX90A: atomicrmw.end:
338+
; GFX90A-NEXT: ret float [[RES]]
339+
;
340+
; GFX940-LABEL: @flat_atomicrmw_fadd_f32__align32(
341+
; GFX940-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
342+
; GFX940-NEXT: ret float [[RES]]
343+
;
344+
; GFX1100-LABEL: @flat_atomicrmw_fadd_f32__align32(
345+
; GFX1100-NEXT: [[RES:%.*]] = atomicrmw fadd ptr [[ADDR:%.*]], float [[VAL:%.*]] seq_cst, align 32, !amdgpu.no.fine.grained.memory [[META0]], !amdgpu.ignore.denormal.mode [[META0]]
346+
; GFX1100-NEXT: ret float [[RES]]
347+
;
348+
%res = atomicrmw fadd ptr %addr, float %val seq_cst, align 32, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
349+
ret float %res
350+
}
351+
297352
!0 = !{}

0 commit comments

Comments
 (0)