Skip to content

Commit 4a77ea0

Browse files
committed
Allow th=3 for SCOPE_SYS
1 parent 1f588aa commit 4a77ea0

File tree

4 files changed

+4
-21
lines changed

4 files changed

+4
-21
lines changed

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -602,10 +602,6 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
602602
// MI. \returns Returns true if \p MI is modified, false otherwise.
603603
bool setScope(const MachineBasicBlock::iterator MI,
604604
AMDGPU::CPol::CPol Value) const;
605-
// Checks if CPol operand is present in instruction \p MI and if current Scope
606-
// policy is same as \p Value.
607-
bool isScope(const MachineBasicBlock::iterator MI,
608-
AMDGPU::CPol::CPol Value) const;
609605

610606
// Stores with system scope (SCOPE_SYS) need to wait for:
611607
// - loads or atomics(returning) - wait for {LOAD|SAMPLE|BVH|KM}CNT==0
@@ -2227,15 +2223,6 @@ bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
22272223
return false;
22282224
}
22292225

2230-
bool SIGfx12CacheControl::isScope(const MachineBasicBlock::iterator MI,
2231-
AMDGPU::CPol::CPol Value) const {
2232-
MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
2233-
if (!CPol)
2234-
return false;
2235-
2236-
return (CPol->getImm() & AMDGPU::CPol::SCOPE) == Value;
2237-
}
2238-
22392226
bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
22402227
const MachineBasicBlock::iterator MI) const {
22412228
// TODO: implement flag for frontend to give us a hint not to insert waits.
@@ -2445,11 +2432,7 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
24452432
bool SIGfx12CacheControl::enableLastUse(MachineInstr &MI,
24462433
bool IsLastUse) const {
24472434
assert(MI.mayLoad() && !MI.mayStore());
2448-
2449-
if (IsLastUse && !isScope(MI, AMDGPU::CPol::SCOPE_SYS))
2450-
return setTH(MI, AMDGPU::CPol::TH_LU);
2451-
2452-
return false;
2435+
return IsLastUse ? setTH(MI, AMDGPU::CPol::TH_LU) : false;
24532436
}
24542437

24552438
bool SIGfx12CacheControl::expandSystemScopeStore(

llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ define amdgpu_kernel void @flat_last_use_volatile_load(ptr %in, ptr %out) {
4747
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
4848
; GFX12-NEXT: s_wait_kmcnt 0x0
4949
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
50-
; GFX12-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS
50+
; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
5151
; GFX12-NEXT: s_wait_loadcnt 0x0
5252
; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
5353
; GFX12-NEXT: s_wait_dscnt 0x0

llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ define amdgpu_kernel void @global_last_use_volatile_load(ptr addrspace(1) %in, p
4646
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
4747
; GFX12-NEXT: v_mov_b32_e32 v0, 0
4848
; GFX12-NEXT: s_wait_kmcnt 0x0
49-
; GFX12-NEXT: global_load_b32 v1, v0, s[0:1] scope:SCOPE_SYS
49+
; GFX12-NEXT: global_load_b32 v1, v0, s[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
5050
; GFX12-NEXT: s_wait_loadcnt 0x0
5151
; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
5252
; GFX12-NEXT: s_nop 0

llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ define amdgpu_kernel void @private_nontemporal_volatile_load(ptr addrspace(5) %i
5151
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
5252
; GFX12-NEXT: v_mov_b32_e32 v1, 0
5353
; GFX12-NEXT: s_wait_kmcnt 0x0
54-
; GFX12-NEXT: scratch_load_b32 v0, off, s2 scope:SCOPE_SYS
54+
; GFX12-NEXT: scratch_load_b32 v0, off, s2 th:TH_LOAD_BYPASS scope:SCOPE_SYS
5555
; GFX12-NEXT: s_wait_loadcnt 0x0
5656
; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
5757
; GFX12-NEXT: s_nop 0

0 commit comments

Comments
 (0)