Skip to content

Commit 2eb767c

Browse files
authored
AMDGPU: Scratch instructions are trivially disjoint from SMEM and buffer instructions (#65287)
Scratch instructions are always in addrspace(5), which can only alias with flat (and itself). SMEM and buffer instructions can never reference those address spaces, so they are trivially disjoint.
1 parent 994bdce commit 2eb767c

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3425,19 +3425,30 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
34253425
if (isMUBUF(MIb) || isMTBUF(MIb))
34263426
return checkInstOffsetsDoNotOverlap(MIa, MIb);
34273427

3428-
return !isFLAT(MIb) && !isSMRD(MIb);
3428+
if (isFLAT(MIb))
3429+
return isFLATScratch(MIb);
3430+
3431+
return !isSMRD(MIb);
34293432
}
34303433

34313434
if (isSMRD(MIa)) {
34323435
if (isSMRD(MIb))
34333436
return checkInstOffsetsDoNotOverlap(MIa, MIb);
34343437

3435-
return !isFLAT(MIb) && !isMUBUF(MIb) && !isMTBUF(MIb);
3438+
if (isFLAT(MIb))
3439+
return isFLATScratch(MIb);
3440+
3441+
return !isMUBUF(MIb) && !isMTBUF(MIb);
34363442
}
34373443

34383444
if (isFLAT(MIa)) {
3439-
if (isFLAT(MIb))
3445+
if (isFLAT(MIb)) {
3446+
if ((isFLATScratch(MIa) && isFLATGlobal(MIb)) ||
3447+
(isFLATGlobal(MIa) && isFLATScratch(MIb)))
3448+
return true;
3449+
34403450
return checkInstOffsetsDoNotOverlap(MIa, MIb);
3451+
}
34413452

34423453
return false;
34433454
}

llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@ define amdgpu_gfx void @example(<4 x i32> inreg %rsrc, ptr addrspace(5) %src, i3
55
; CHECK-LABEL: example:
66
; CHECK: ; %bb.0:
77
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; CHECK-NEXT: v_add_nc_u32_e32 v3, 4, v0
9+
; CHECK-NEXT: s_clause 0x1
810
; CHECK-NEXT: scratch_load_b32 v2, v0, off
9-
; CHECK-NEXT: v_add_nc_u32_e32 v0, 4, v0
11+
; CHECK-NEXT: scratch_load_b32 v3, v3, off
1012
; CHECK-NEXT: s_waitcnt vmcnt(0)
11-
; CHECK-NEXT: buffer_store_b32 v2, v1, s[4:7], 0 offen
12-
; CHECK-NEXT: scratch_load_b32 v0, v0, off
13-
; CHECK-NEXT: s_waitcnt vmcnt(0)
14-
; CHECK-NEXT: buffer_store_b32 v0, v1, s[4:7], 0 offen offset:4
13+
; CHECK-NEXT: buffer_store_b64 v[2:3], v1, s[4:7], 0 offen
1514
; CHECK-NEXT: s_setpc_b64 s[30:31]
16-
;
15+
1716
%x0 = load i32, ptr addrspace(5) %src
1817
call void @llvm.amdgcn.raw.buffer.store.i32(i32 %x0, <4 x i32> %rsrc, i32 %dst, i32 0, i32 0)
1918
%src1 = getelementptr i8, ptr addrspace(5) %src, i32 4

0 commit comments

Comments
 (0)