Skip to content

Commit 8632e8b

Browse files
authored
AMDGPU: Fix implicit vcc def to vcc_lo on wave32 targets (#109514)
1 parent f28a035 commit 8632e8b

17 files changed

+219
-171
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4514,7 +4514,6 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
45144514
// of vcc was already added during the initial BuildMI, but we
45154515
// 1) may need to change vcc to vcc_lo to preserve the original register
45164516
// 2) have to preserve the original flags.
4517-
fixImplicitOperands(*Inst32);
45184517
copyFlagsToImplicitVCC(*Inst32, *Src2);
45194518
continue;
45204519
}
@@ -4524,7 +4523,7 @@ MachineInstr *SIInstrInfo::buildShrunkInst(MachineInstr &MI,
45244523
}
45254524

45264525
// FIXME: Losing implicit operands
4527-
4526+
fixImplicitOperands(*Inst32);
45284527
return Inst32;
45294528
}
45304529

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ define void @divergent_i1_phi_used_inside_loop_bigger_loop_body(float %val, floa
167167
; GFX10-NEXT: s_cbranch_execz .LBB3_6
168168
; GFX10-NEXT: .LBB3_2: ; %loop_start
169169
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
170-
; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8
171170
; GFX10-NEXT: s_mov_b32 s7, 1
171+
; GFX10-NEXT: v_cmp_ge_i32_e32 vcc_lo, 0x3e8, v8
172172
; GFX10-NEXT: s_cbranch_vccz .LBB3_4
173173
; GFX10-NEXT: ; %bb.3: ; %else
174174
; GFX10-NEXT: ; in Loop: Header=BB3_2 Depth=1

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,12 @@ define void @divergent_i1_phi_used_outside_loop_larger_loop_body(float %val, ptr
7575
; GFX10-NEXT: .LBB1_1: ; %loop.cond
7676
; GFX10-NEXT: ; in Loop: Header=BB1_2 Depth=1
7777
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
78-
; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0
7978
; GFX10-NEXT: v_add_co_u32 v1, s4, v1, 4
79+
; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0
8080
; GFX10-NEXT: v_add_co_ci_u32_e64 v2, s4, 0, v2, s4
81-
; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0
8281
; GFX10-NEXT: s_andn2_b32 s7, s5, exec_lo
8382
; GFX10-NEXT: s_and_b32 s8, exec_lo, s6
83+
; GFX10-NEXT: v_cmp_le_i32_e32 vcc_lo, 10, v0
8484
; GFX10-NEXT: s_or_b32 s4, s7, s8
8585
; GFX10-NEXT: s_cbranch_vccz .LBB1_4
8686
; GFX10-NEXT: .LBB1_2: ; %loop.start
@@ -191,9 +191,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
191191
; GFX10-LABEL: divergent_i1_xor_used_outside_loop_larger_loop_body:
192192
; GFX10: ; %bb.0: ; %entry
193193
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194-
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
195194
; GFX10-NEXT: s_mov_b32 s5, 0
196195
; GFX10-NEXT: s_mov_b32 s6, -1
196+
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
197197
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
198198
; GFX10-NEXT: s_cbranch_execz .LBB3_6
199199
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,9 +1323,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
13231323
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
13241324
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
13251325
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
1326-
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
13271326
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
13281327
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
1328+
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
13291329
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
13301330
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
13311331
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB2_2
@@ -1451,10 +1451,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
14511451
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
14521452
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
14531453
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
1454-
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
1455-
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
14561454
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
14571455
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
1456+
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
14581457
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
14591458
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
14601459
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB2_2
@@ -1587,9 +1586,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
15871586
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
15881587
; GFX1232_DPP-NEXT: s_wait_alu 0xfffe
15891588
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
1590-
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
15911589
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
15921590
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
1591+
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
15931592
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
15941593
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
15951594
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB2_2
@@ -3228,8 +3227,8 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
32283227
; GFX1032_DPP-NEXT: v_writelane_b32 v2, s8, 16
32293228
; GFX1032_DPP-NEXT: v_writelane_b32 v1, s3, 16
32303229
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
3231-
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
32323230
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
3231+
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
32333232
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
32343233
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
32353234
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB5_2
@@ -4991,9 +4990,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
49914990
; GFX1032_DPP-NEXT: s_or_saveexec_b32 s0, -1
49924991
; GFX1032_DPP-NEXT: v_writelane_b32 v3, s1, 16
49934992
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s0
4994-
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
49954993
; GFX1032_DPP-NEXT: s_mov_b32 s0, s2
49964994
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
4995+
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
49974996
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr0
49984997
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
49994998
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB8_2
@@ -5119,10 +5118,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
51195118
; GFX1132_DPP-NEXT: s_or_saveexec_b32 s4, -1
51205119
; GFX1132_DPP-NEXT: v_writelane_b32 v3, s5, 16
51215120
; GFX1132_DPP-NEXT: s_mov_b32 exec_lo, s4
5122-
; GFX1132_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
5123-
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
51245121
; GFX1132_DPP-NEXT: s_mov_b32 s4, s6
51255122
; GFX1132_DPP-NEXT: s_mov_b32 s6, -1
5123+
; GFX1132_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
51265124
; GFX1132_DPP-NEXT: ; implicit-def: $vgpr0
51275125
; GFX1132_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
51285126
; GFX1132_DPP-NEXT: s_cbranch_execz .LBB8_2
@@ -5255,9 +5253,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
52555253
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
52565254
; GFX1232_DPP-NEXT: s_wait_alu 0xfffe
52575255
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
5258-
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
52595256
; GFX1232_DPP-NEXT: s_mov_b32 s4, s6
52605257
; GFX1232_DPP-NEXT: s_mov_b32 s6, -1
5258+
; GFX1232_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
52615259
; GFX1232_DPP-NEXT: ; implicit-def: $vgpr0
52625260
; GFX1232_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
52635261
; GFX1232_DPP-NEXT: s_cbranch_execz .LBB8_2
@@ -6938,8 +6936,8 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
69386936
; GFX1032_DPP-NEXT: v_writelane_b32 v2, s8, 16
69396937
; GFX1032_DPP-NEXT: v_writelane_b32 v1, s3, 16
69406938
; GFX1032_DPP-NEXT: s_mov_b32 exec_lo, s2
6941-
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
69426939
; GFX1032_DPP-NEXT: s_mov_b32 s2, -1
6940+
; GFX1032_DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
69436941
; GFX1032_DPP-NEXT: ; implicit-def: $vgpr9_vgpr10
69446942
; GFX1032_DPP-NEXT: s_and_saveexec_b32 s8, vcc_lo
69456943
; GFX1032_DPP-NEXT: s_cbranch_execz .LBB11_2

0 commit comments

Comments
 (0)