Skip to content

Commit 74aa4cc

Browse files
committed
[AMDGPU] Fix machine verification failure from INIT_EXEC lowering
Fix machine verification failure from INIT_EXEC lowering since it was moved from SILowerControlFlow to SIWholeQuadMode in llvm#94452.
1 parent 9ae24c9 commit 74aa4cc

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,8 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
16761676
if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
16771677
LowerToMovInstrs.empty() && KillInstrs.empty()) {
16781678
lowerLiveMaskQueries();
1679+
if (!InitExecInstrs.empty())
1680+
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
16791681
return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
16801682
}
16811683

llvm/test/CodeGen/AMDGPU/wqm.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3463,6 +3463,38 @@ bb:
34633463
ret void
34643464
}
34653465

3466+
; Test a case that failed machine verification.
3467+
define amdgpu_gs void @wqm_init_exec_switch(i32 %arg) {
3468+
; GFX9-W64-LABEL: wqm_init_exec_switch:
3469+
; GFX9-W64: ; %bb.0:
3470+
; GFX9-W64-NEXT: s_mov_b64 exec, 0
3471+
; GFX9-W64-NEXT: v_cmp_lt_i32_e32 vcc, 0, v0
3472+
; GFX9-W64-NEXT: s_and_saveexec_b64 s[0:1], vcc
3473+
; GFX9-W64-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
3474+
; GFX9-W64-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
3475+
; GFX9-W64-NEXT: s_endpgm
3476+
;
3477+
; GFX10-W32-LABEL: wqm_init_exec_switch:
3478+
; GFX10-W32: ; %bb.0:
3479+
; GFX10-W32-NEXT: s_mov_b32 exec_lo, 0
3480+
; GFX10-W32-NEXT: s_mov_b32 s0, exec_lo
3481+
; GFX10-W32-NEXT: v_cmpx_lt_i32_e32 0, v0
3482+
; GFX10-W32-NEXT: s_xor_b32 s0, exec_lo, s0
3483+
; GFX10-W32-NEXT: s_andn2_saveexec_b32 s0, s0
3484+
; GFX10-W32-NEXT: s_endpgm
3485+
call void @llvm.amdgcn.init.exec(i64 0)
3486+
switch i32 %arg, label %bb1 [
3487+
i32 0, label %bb3
3488+
i32 1, label %bb2
3489+
]
3490+
bb1:
3491+
ret void
3492+
bb2:
3493+
ret void
3494+
bb3:
3495+
ret void
3496+
}
3497+
34663498
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
34673499
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
34683500

0 commit comments

Comments
 (0)