Skip to content

Commit 68703a0

Browse files
AMDGPU/GlobalISel: Disable LCSSA pass
Disable LCSSA pass in preparation for implementing temporal divergence lowering in amdgpu divergence lowering. Breaks all cases where sgpr or i1 values are used outside of the cycle with divergent exit. Regenerate regression tests for amdgpu divergence lowering with LCSSA disabled. Update IntrinsicLaneMaskAnalyzer to stop tracking lcssa phis that are lane masks.
1 parent a71c9d8 commit 68703a0

13 files changed

+1352
-1098
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,25 +91,17 @@ void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) {
9191
GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI);
9292
if (GI && GI->is(Intrinsic::amdgcn_if_break)) {
9393
S32S64LaneMask.insert(MI.getOperand(3).getReg());
94-
findLCSSAPhi(MI.getOperand(0).getReg());
94+
S32S64LaneMask.insert(MI.getOperand(0).getReg());
9595
}
9696

9797
if (MI.getOpcode() == AMDGPU::SI_IF ||
9898
MI.getOpcode() == AMDGPU::SI_ELSE) {
99-
findLCSSAPhi(MI.getOperand(0).getReg());
99+
S32S64LaneMask.insert(MI.getOperand(0).getReg());
100100
}
101101
}
102102
}
103103
}
104104

105-
void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) {
106-
S32S64LaneMask.insert(Reg);
107-
for (const MachineInstr &LCSSAPhi : MRI.use_instructions(Reg)) {
108-
if (LCSSAPhi.isPHI())
109-
S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg());
110-
}
111-
}
112-
113105
static LLT getReadAnyLaneSplitTy(LLT Ty) {
114106
if (Ty.isVector()) {
115107
LLT ElTy = Ty.getElementType();

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ class IntrinsicLaneMaskAnalyzer {
4747

4848
private:
4949
void initLaneMaskIntrinsics(MachineFunction &MF);
50-
// This will not be needed when we turn off LCSSA for global-isel.
51-
void findLCSSAPhi(Register Reg);
5250
};
5351

5452
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,7 +1383,11 @@ bool GCNPassConfig::addPreISel() {
13831383
// control flow modifications.
13841384
addPass(createAMDGPURewriteUndefForPHILegacyPass());
13851385

1386-
addPass(createLCSSAPass());
1386+
// SDAG requires LCSSA, GlobalISel does not. Disable LCSSA for -global-isel
1387+
// with -new-reg-bank-select and without any of the fallback options.
1388+
if (!getCGPassBuilderOption().EnableGlobalISelOption ||
1389+
!isGlobalISelAbortEnabled() || !NewRegBankSelect)
1390+
addPass(createLCSSAPass());
13871391

13881392
if (TM->getOptLevel() > CodeGenOptLevel::Less)
13891393
addPass(&AMDGPUPerfHintAnalysisLegacyID);
@@ -2087,7 +2091,9 @@ void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
20872091
// control flow modifications.
20882092
addPass(AMDGPURewriteUndefForPHIPass());
20892093

2090-
addPass(LCSSAPass());
2094+
if (!getCGPassBuilderOption().EnableGlobalISelOption ||
2095+
!isGlobalISelAbortEnabled() || !NewRegBankSelect)
2096+
addPass(LCSSAPass());
20912097

20922098
if (TM.getOptLevel() > CodeGenOptLevel::Less)
20932099
addPass(AMDGPUPerfHintAnalysisPass(TM));

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir

Lines changed: 143 additions & 171 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll

Lines changed: 97 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,55 @@ exit:
178178
ret void
179179
}
180180

181+
define void @divergent_i1_xor_used_outside_loop_twice(float %val, float %pre.cond.val, ptr %addr, ptr %addr2) {
182+
; GFX10-LABEL: divergent_i1_xor_used_outside_loop_twice:
183+
; GFX10: ; %bb.0: ; %entry
184+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+
; GFX10-NEXT: s_mov_b32 s4, 0
186+
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1
187+
; GFX10-NEXT: v_mov_b32_e32 v1, s4
188+
; GFX10-NEXT: ; implicit-def: $sgpr6
189+
; GFX10-NEXT: .LBB3_1: ; %loop
190+
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
191+
; GFX10-NEXT: v_cvt_f32_u32_e32 v6, v1
192+
; GFX10-NEXT: s_xor_b32 s5, s5, -1
193+
; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
194+
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v0
195+
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
196+
; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
197+
; GFX10-NEXT: s_and_b32 s7, exec_lo, s5
198+
; GFX10-NEXT: s_or_b32 s6, s6, s7
199+
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
200+
; GFX10-NEXT: s_cbranch_execnz .LBB3_1
201+
; GFX10-NEXT: ; %bb.2: ; %exit
202+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
203+
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6
204+
; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, 2.0, s6
205+
; GFX10-NEXT: flat_store_dword v[2:3], v0
206+
; GFX10-NEXT: flat_store_dword v[4:5], v1
207+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
208+
; GFX10-NEXT: s_setpc_b64 s[30:31]
209+
entry:
210+
%pre.cond = fcmp ogt float %pre.cond.val, 1.0
211+
br label %loop
212+
213+
loop:
214+
%counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ]
215+
%bool.counter = phi i1 [ %pre.cond, %entry ], [ %neg.bool.counter, %loop ]
216+
%neg.bool.counter = xor i1 %bool.counter, true
217+
%f.counter = uitofp i32 %counter to float
218+
%cond = fcmp ogt float %f.counter, %val
219+
%counter.plus.1 = add i32 %counter, 1
220+
br i1 %cond, label %exit, label %loop
221+
222+
exit:
223+
%select = select i1 %neg.bool.counter, float 1.000000e+00, float 0.000000e+00
224+
store float %select, ptr %addr
225+
%select2 = select i1 %neg.bool.counter, float 2.000000e+00, float -1.000000e+00
226+
store float %select2, ptr %addr2
227+
ret void
228+
}
229+
181230
;void xor(int num_elts, int* a, int* addr) {
182231
;for(int i=0; i<num_elts; ++i) {
183232
; if(a[i]==0)
@@ -195,15 +244,15 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
195244
; GFX10-NEXT: s_mov_b32 s5, 0
196245
; GFX10-NEXT: s_mov_b32 s6, -1
197246
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
198-
; GFX10-NEXT: s_cbranch_execz .LBB3_6
247+
; GFX10-NEXT: s_cbranch_execz .LBB4_6
199248
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader
200249
; GFX10-NEXT: v_mov_b32_e32 v5, s5
201250
; GFX10-NEXT: ; implicit-def: $sgpr6
202251
; GFX10-NEXT: ; implicit-def: $sgpr7
203252
; GFX10-NEXT: ; implicit-def: $sgpr8
204-
; GFX10-NEXT: s_branch .LBB3_3
205-
; GFX10-NEXT: .LBB3_2: ; %Flow
206-
; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
253+
; GFX10-NEXT: s_branch .LBB4_3
254+
; GFX10-NEXT: .LBB4_2: ; %Flow
255+
; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
207256
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9
208257
; GFX10-NEXT: s_xor_b32 s9, s8, -1
209258
; GFX10-NEXT: s_and_b32 s10, exec_lo, s7
@@ -212,8 +261,8 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
212261
; GFX10-NEXT: s_and_b32 s9, exec_lo, s9
213262
; GFX10-NEXT: s_or_b32 s6, s6, s9
214263
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
215-
; GFX10-NEXT: s_cbranch_execz .LBB3_5
216-
; GFX10-NEXT: .LBB3_3: ; %loop.start
264+
; GFX10-NEXT: s_cbranch_execz .LBB4_5
265+
; GFX10-NEXT: .LBB4_3: ; %loop.start
217266
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
218267
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
219268
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -228,9 +277,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
228277
; GFX10-NEXT: s_waitcnt vmcnt(0)
229278
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
230279
; GFX10-NEXT: s_and_saveexec_b32 s9, vcc_lo
231-
; GFX10-NEXT: s_cbranch_execz .LBB3_2
280+
; GFX10-NEXT: s_cbranch_execz .LBB4_2
232281
; GFX10-NEXT: ; %bb.4: ; %loop.cond
233-
; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
282+
; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
234283
; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v5
235284
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
236285
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -240,20 +289,20 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
240289
; GFX10-NEXT: s_and_b32 s11, exec_lo, vcc_lo
241290
; GFX10-NEXT: s_or_b32 s8, s8, s10
242291
; GFX10-NEXT: s_or_b32 s7, s7, s11
243-
; GFX10-NEXT: s_branch .LBB3_2
244-
; GFX10-NEXT: .LBB3_5: ; %loop.exit.guard
292+
; GFX10-NEXT: s_branch .LBB4_2
293+
; GFX10-NEXT: .LBB4_5: ; %loop.exit.guard
245294
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
246295
; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo
247296
; GFX10-NEXT: s_and_b32 s6, exec_lo, s6
248297
; GFX10-NEXT: s_or_b32 s6, s5, s6
249-
; GFX10-NEXT: .LBB3_6: ; %Flow1
298+
; GFX10-NEXT: .LBB4_6: ; %Flow1
250299
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
251300
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
252-
; GFX10-NEXT: s_cbranch_execz .LBB3_8
301+
; GFX10-NEXT: s_cbranch_execz .LBB4_8
253302
; GFX10-NEXT: ; %bb.7: ; %block.after.loop
254303
; GFX10-NEXT: v_mov_b32_e32 v0, 5
255304
; GFX10-NEXT: flat_store_dword v[3:4], v0
256-
; GFX10-NEXT: .LBB3_8: ; %exit
305+
; GFX10-NEXT: .LBB4_8: ; %exit
257306
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
258307
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
259308
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -299,53 +348,53 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
299348
; GFX10-NEXT: s_mov_b32 s5, 0
300349
; GFX10-NEXT: ; implicit-def: $sgpr6
301350
; GFX10-NEXT: v_mov_b32_e32 v5, s5
302-
; GFX10-NEXT: s_branch .LBB4_2
303-
; GFX10-NEXT: .LBB4_1: ; %Flow
304-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
351+
; GFX10-NEXT: s_branch .LBB5_2
352+
; GFX10-NEXT: .LBB5_1: ; %Flow
353+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
305354
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8
306355
; GFX10-NEXT: s_and_b32 s4, exec_lo, s7
307356
; GFX10-NEXT: s_or_b32 s5, s4, s5
308357
; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo
309358
; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
310359
; GFX10-NEXT: s_or_b32 s6, s4, s6
311360
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
312-
; GFX10-NEXT: s_cbranch_execz .LBB4_6
313-
; GFX10-NEXT: .LBB4_2: ; %cond.block.0
361+
; GFX10-NEXT: s_cbranch_execz .LBB5_6
362+
; GFX10-NEXT: .LBB5_2: ; %cond.block.0
314363
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
315364
; GFX10-NEXT: v_mov_b32_e32 v4, v5
316365
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4
317366
; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo
318-
; GFX10-NEXT: s_cbranch_execz .LBB4_4
367+
; GFX10-NEXT: s_cbranch_execz .LBB5_4
319368
; GFX10-NEXT: ; %bb.3: ; %if.block.0
320-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
369+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
321370
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
322371
; GFX10-NEXT: v_lshlrev_b64 v[8:9], 2, v[4:5]
323372
; GFX10-NEXT: v_add_co_u32 v8, s4, v2, v8
324373
; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v3, v9, s4
325374
; GFX10-NEXT: global_store_dword v[8:9], v4, off
326-
; GFX10-NEXT: .LBB4_4: ; %loop.break.block
327-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
375+
; GFX10-NEXT: .LBB5_4: ; %loop.break.block
376+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
328377
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
329378
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7
330379
; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4
331380
; GFX10-NEXT: s_mov_b32 s7, -1
332381
; GFX10-NEXT: ; implicit-def: $vgpr5
333382
; GFX10-NEXT: s_and_saveexec_b32 s8, s4
334-
; GFX10-NEXT: s_cbranch_execz .LBB4_1
383+
; GFX10-NEXT: s_cbranch_execz .LBB5_1
335384
; GFX10-NEXT: ; %bb.5: ; %loop.cond
336-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
385+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
337386
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v4
338387
; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo
339388
; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
340389
; GFX10-NEXT: s_or_b32 s7, s4, s7
341-
; GFX10-NEXT: s_branch .LBB4_1
342-
; GFX10-NEXT: .LBB4_6: ; %cond.block.1
390+
; GFX10-NEXT: s_branch .LBB5_1
391+
; GFX10-NEXT: .LBB5_6: ; %cond.block.1
343392
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
344393
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
345-
; GFX10-NEXT: s_cbranch_execz .LBB4_8
394+
; GFX10-NEXT: s_cbranch_execz .LBB5_8
346395
; GFX10-NEXT: ; %bb.7: ; %if.block.1
347396
; GFX10-NEXT: global_store_dword v[6:7], v4, off
348-
; GFX10-NEXT: .LBB4_8: ; %exit
397+
; GFX10-NEXT: .LBB5_8: ; %exit
349398
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
350399
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
351400
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -410,9 +459,9 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
410459
; GFX10-NEXT: v_mov_b32_e32 v5, s0
411460
; GFX10-NEXT: ; implicit-def: $sgpr1
412461
; GFX10-NEXT: ; implicit-def: $sgpr2
413-
; GFX10-NEXT: s_branch .LBB5_2
414-
; GFX10-NEXT: .LBB5_1: ; %loop.cond
415-
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
462+
; GFX10-NEXT: s_branch .LBB6_2
463+
; GFX10-NEXT: .LBB6_1: ; %loop.cond
464+
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
416465
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
417466
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
418467
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v5
@@ -423,16 +472,16 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
423472
; GFX10-NEXT: s_or_b32 s3, s3, s4
424473
; GFX10-NEXT: s_or_b32 s1, s1, s4
425474
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
426-
; GFX10-NEXT: s_cbranch_execz .LBB5_4
427-
; GFX10-NEXT: .LBB5_2: ; %loop.start
475+
; GFX10-NEXT: s_cbranch_execz .LBB6_4
476+
; GFX10-NEXT: .LBB6_2: ; %loop.start
428477
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
429478
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
430479
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
431480
; GFX10-NEXT: s_or_b32 s2, s2, s4
432481
; GFX10-NEXT: s_and_saveexec_b32 s4, s3
433-
; GFX10-NEXT: s_cbranch_execz .LBB5_1
482+
; GFX10-NEXT: s_cbranch_execz .LBB6_1
434483
; GFX10-NEXT: ; %bb.3: ; %is.eq.zero
435-
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
484+
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
436485
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
437486
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
438487
; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6]
@@ -444,8 +493,8 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
444493
; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo
445494
; GFX10-NEXT: s_or_b32 s2, s2, s3
446495
; GFX10-NEXT: ; implicit-def: $sgpr3
447-
; GFX10-NEXT: s_branch .LBB5_1
448-
; GFX10-NEXT: .LBB5_4: ; %exit
496+
; GFX10-NEXT: s_branch .LBB6_1
497+
; GFX10-NEXT: .LBB6_4: ; %exit
449498
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
450499
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s1
451500
; GFX10-NEXT: flat_store_dword v[3:4], v0
@@ -486,9 +535,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
486535
; GFX10-NEXT: ; implicit-def: $sgpr2
487536
; GFX10-NEXT: ; implicit-def: $sgpr3
488537
; GFX10-NEXT: v_mov_b32_e32 v6, s0
489-
; GFX10-NEXT: s_branch .LBB6_2
490-
; GFX10-NEXT: .LBB6_1: ; %Flow
491-
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
538+
; GFX10-NEXT: s_branch .LBB7_2
539+
; GFX10-NEXT: .LBB7_1: ; %Flow
540+
; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
492541
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
493542
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
494543
; GFX10-NEXT: s_and_b32 s4, exec_lo, s2
@@ -497,8 +546,8 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
497546
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
498547
; GFX10-NEXT: s_or_b32 s1, s1, s4
499548
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
500-
; GFX10-NEXT: s_cbranch_execz .LBB6_4
501-
; GFX10-NEXT: .LBB6_2: ; %A
549+
; GFX10-NEXT: s_cbranch_execz .LBB7_4
550+
; GFX10-NEXT: .LBB7_2: ; %A
502551
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
503552
; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6
504553
; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
@@ -513,9 +562,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
513562
; GFX10-NEXT: s_waitcnt vmcnt(0)
514563
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
515564
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
516-
; GFX10-NEXT: s_cbranch_execz .LBB6_1
565+
; GFX10-NEXT: s_cbranch_execz .LBB7_1
517566
; GFX10-NEXT: ; %bb.3: ; %loop.body
518-
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
567+
; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
519568
; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7
520569
; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
521570
; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6
@@ -531,16 +580,16 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
531580
; GFX10-NEXT: s_waitcnt vmcnt(0)
532581
; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9
533582
; GFX10-NEXT: global_store_dword v[7:8], v9, off
534-
; GFX10-NEXT: s_branch .LBB6_1
535-
; GFX10-NEXT: .LBB6_4: ; %loop.exit.guard
583+
; GFX10-NEXT: s_branch .LBB7_1
584+
; GFX10-NEXT: .LBB7_4: ; %loop.exit.guard
536585
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
537586
; GFX10-NEXT: s_and_saveexec_b32 s0, s1
538587
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
539-
; GFX10-NEXT: s_cbranch_execz .LBB6_6
588+
; GFX10-NEXT: s_cbranch_execz .LBB7_6
540589
; GFX10-NEXT: ; %bb.5: ; %break.body
541590
; GFX10-NEXT: v_mov_b32_e32 v0, 10
542591
; GFX10-NEXT: global_store_dword v[4:5], v0, off
543-
; GFX10-NEXT: .LBB6_6: ; %exit
592+
; GFX10-NEXT: .LBB7_6: ; %exit
544593
; GFX10-NEXT: s_endpgm
545594
entry:
546595
br label %A

0 commit comments

Comments
 (0)