Skip to content

Commit 8b7041a

Browse files
committed
AMDGPU/GFX10: Apply the VMEM-to-scalar-write hazard also to writes to EXEC
Summary: Change-Id: I854fbf7d48e937bef9f8f3f5d0c8aeb970652630 Reviewers: rampitec, mareko Subscribers: arsenm, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64807 Change-Id: I4405b3a7f84186acea5a78d291bff71056e745fc llvm-svn: 366314
1 parent a256b8b commit 8b7041a

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -920,7 +920,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
920920

921921
for (const MachineOperand &Def : MI->defs()) {
922922
MachineOperand *Op = I->findRegisterUseOperand(Def.getReg(), false, TRI);
923-
if (!Op || (Op->isImplicit() && Op->getReg() == AMDGPU::EXEC))
923+
if (!Op)
924924
continue;
925925
return true;
926926
}

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
1414
; GCN-NEXT: v_mov_b32_e32 v0, 0
1515
; GCN-NEXT: global_store_dword v[0:1], v0, off
1616
; GCN-NEXT: BB0_2: ; %bb
17+
; GCN-NEXT: v_nop
1718
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s0
1819
; GCN-NEXT: v_mov_b32_e32 v0, 0
1920
; GCN-NEXT: global_store_dword v[0:1], v0, off

llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir

+14
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ body: |
9292
...
9393
# GCN-LABEL: name: vmem_write_exec_impread
9494
# GCN: BUFFER_LOAD_DWORD_OFFEN
95+
# GCN: V_NOP
9596
# GCN-NEXT: S_MOV_B64
9697
---
9798
name: vmem_write_exec_impread
@@ -208,3 +209,16 @@ body: |
208209
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
209210
S_BRANCH %bb.0
210211
...
212+
# GCN-LABEL: name: ds_write_exec
213+
# GCN: DS_WRITE_B32_gfx9
214+
# GCN-NEXT: V_NOP
215+
# GCN-NEXT: S_MOV_B32
216+
---
217+
name: ds_write_exec
218+
body: |
219+
bb.0:
220+
$vgpr0 = IMPLICIT_DEF
221+
$vgpr1 = IMPLICIT_DEF
222+
DS_WRITE_B32_gfx9 $vgpr0, $vgpr1, 0, 0, implicit $exec
223+
$exec_lo = S_MOV_B32 -1
224+
...

llvm/test/CodeGen/AMDGPU/wave32.ll

+2
Original file line numberDiff line numberDiff line change
@@ -1073,6 +1073,7 @@ declare void @external_void_func_void() #1
10731073
; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
10741074
; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}}
10751075
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
1076+
; GCN-NEXT: v_nop
10761077
; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
10771078
; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
10781079

@@ -1095,6 +1096,7 @@ declare void @external_void_func_void() #1
10951096
; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
10961097
; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
10971098
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
1099+
; GCN-NEXT: v_nop
10981100
; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
10991101
; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]]
11001102
; GCN-NEXT: s_waitcnt vmcnt(0)

0 commit comments

Comments
 (0)