Skip to content

Commit 6f44d41

Browse files
AMDGPU/GlobalISel: Disable LCSSA pass
Disable LCSSA pass in preparation for implementing temporal divergence lowering in amdgpu divergence lowering. Breaks all cases where sgpr or i1 values are used outside of the cycle with divergent exit. Regenerate regression tests for amdgpu divergence lowering with LCSSA disabled. Update IntrinsicLaneMaskAnalyzer to stop tracking lcssa phis that are lane masks.
1 parent 2477f82 commit 6f44d41

13 files changed

+1355
-1098
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,25 +91,17 @@ void IntrinsicLaneMaskAnalyzer::initLaneMaskIntrinsics(MachineFunction &MF) {
9191
GIntrinsic *GI = dyn_cast<GIntrinsic>(&MI);
9292
if (GI && GI->is(Intrinsic::amdgcn_if_break)) {
9393
S32S64LaneMask.insert(MI.getOperand(3).getReg());
94-
findLCSSAPhi(MI.getOperand(0).getReg());
94+
S32S64LaneMask.insert(MI.getOperand(0).getReg());
9595
}
9696

9797
if (MI.getOpcode() == AMDGPU::SI_IF ||
9898
MI.getOpcode() == AMDGPU::SI_ELSE) {
99-
findLCSSAPhi(MI.getOperand(0).getReg());
99+
S32S64LaneMask.insert(MI.getOperand(0).getReg());
100100
}
101101
}
102102
}
103103
}
104104

105-
void IntrinsicLaneMaskAnalyzer::findLCSSAPhi(Register Reg) {
106-
S32S64LaneMask.insert(Reg);
107-
for (const MachineInstr &LCSSAPhi : MRI.use_instructions(Reg)) {
108-
if (LCSSAPhi.isPHI())
109-
S32S64LaneMask.insert(LCSSAPhi.getOperand(0).getReg());
110-
}
111-
}
112-
113105
static LLT getReadAnyLaneSplitTy(LLT Ty) {
114106
if (Ty.isVector()) {
115107
LLT ElTy = Ty.getElementType();

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ class IntrinsicLaneMaskAnalyzer {
4747

4848
private:
4949
void initLaneMaskIntrinsics(MachineFunction &MF);
50-
// This will not be needed when we turn off LCSSA for global-isel.
51-
void findLCSSAPhi(Register Reg);
5250
};
5351

5452
void buildReadAnyLane(MachineIRBuilder &B, Register SgprDst, Register VgprSrc,

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,11 @@ bool GCNPassConfig::addPreISel() {
13821382
// control flow modifications.
13831383
addPass(createAMDGPURewriteUndefForPHILegacyPass());
13841384

1385-
addPass(createLCSSAPass());
1385+
// SDAG requires LCSSA, GlobalISel does not. Disable LCSSA for -global-isel
1386+
// with -new-reg-bank-select and without any of the fallback options.
1387+
if (!getCGPassBuilderOption().EnableGlobalISelOption ||
1388+
!isGlobalISelAbortEnabled() || !NewRegBankSelect)
1389+
addPass(createLCSSAPass());
13861390

13871391
if (TM->getOptLevel() > CodeGenOptLevel::Less)
13881392
addPass(&AMDGPUPerfHintAnalysisLegacyID);
@@ -2086,7 +2090,9 @@ void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
20862090
// control flow modifications.
20872091
addPass(AMDGPURewriteUndefForPHIPass());
20882092

2089-
addPass(LCSSAPass());
2093+
if (!getCGPassBuilderOption().EnableGlobalISelOption ||
2094+
!isGlobalISelAbortEnabled() || !NewRegBankSelect)
2095+
addPass(LCSSAPass());
20902096

20912097
if (TM.getOptLevel() > CodeGenOptLevel::Less)
20922098
addPass(AMDGPUPerfHintAnalysisPass(TM));

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir

Lines changed: 143 additions & 171 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll

Lines changed: 97 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,55 @@ exit:
178178
ret void
179179
}
180180

181+
define void @divergent_i1_xor_used_outside_loop_twice(float %val, float %pre.cond.val, ptr %addr, ptr %addr2) {
182+
; GFX10-LABEL: divergent_i1_xor_used_outside_loop_twice:
183+
; GFX10: ; %bb.0: ; %entry
184+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185+
; GFX10-NEXT: s_mov_b32 s4, 0
186+
; GFX10-NEXT: v_cmp_lt_f32_e64 s5, 1.0, v1
187+
; GFX10-NEXT: v_mov_b32_e32 v1, s4
188+
; GFX10-NEXT: ; implicit-def: $sgpr6
189+
; GFX10-NEXT: .LBB3_1: ; %loop
190+
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
191+
; GFX10-NEXT: v_cvt_f32_u32_e32 v6, v1
192+
; GFX10-NEXT: s_xor_b32 s5, s5, -1
193+
; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v1
194+
; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, v6, v0
195+
; GFX10-NEXT: s_or_b32 s4, vcc_lo, s4
196+
; GFX10-NEXT: s_andn2_b32 s6, s6, exec_lo
197+
; GFX10-NEXT: s_and_b32 s7, exec_lo, s5
198+
; GFX10-NEXT: s_or_b32 s6, s6, s7
199+
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4
200+
; GFX10-NEXT: s_cbranch_execnz .LBB3_1
201+
; GFX10-NEXT: ; %bb.2: ; %exit
202+
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
203+
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s6
204+
; GFX10-NEXT: v_cndmask_b32_e64 v1, -1.0, 2.0, s6
205+
; GFX10-NEXT: flat_store_dword v[2:3], v0
206+
; GFX10-NEXT: flat_store_dword v[4:5], v1
207+
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
208+
; GFX10-NEXT: s_setpc_b64 s[30:31]
209+
entry:
210+
%pre.cond = fcmp ogt float %pre.cond.val, 1.0
211+
br label %loop
212+
213+
loop:
214+
%counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ]
215+
%bool.counter = phi i1 [ %pre.cond, %entry ], [ %neg.bool.counter, %loop ]
216+
%neg.bool.counter = xor i1 %bool.counter, true
217+
%f.counter = uitofp i32 %counter to float
218+
%cond = fcmp ogt float %f.counter, %val
219+
%counter.plus.1 = add i32 %counter, 1
220+
br i1 %cond, label %exit, label %loop
221+
222+
exit:
223+
%select = select i1 %neg.bool.counter, float 1.000000e+00, float 0.000000e+00
224+
store float %select, ptr %addr
225+
%select2 = select i1 %neg.bool.counter, float 2.000000e+00, float -1.000000e+00
226+
store float %select2, ptr %addr2
227+
ret void
228+
}
229+
181230
;void xor(int num_elts, int* a, int* addr) {
182231
;for(int i=0; i<num_elts; ++i) {
183232
; if(a[i]==0)
@@ -195,15 +244,15 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
195244
; GFX10-NEXT: s_mov_b32 s5, 0
196245
; GFX10-NEXT: s_mov_b32 s6, -1
197246
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
198-
; GFX10-NEXT: s_cbranch_execz .LBB3_6
247+
; GFX10-NEXT: s_cbranch_execz .LBB4_6
199248
; GFX10-NEXT: ; %bb.1: ; %loop.start.preheader
200249
; GFX10-NEXT: v_mov_b32_e32 v5, s5
201250
; GFX10-NEXT: ; implicit-def: $sgpr6
202251
; GFX10-NEXT: ; implicit-def: $sgpr7
203252
; GFX10-NEXT: ; implicit-def: $sgpr8
204-
; GFX10-NEXT: s_branch .LBB3_3
205-
; GFX10-NEXT: .LBB3_2: ; %Flow
206-
; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
253+
; GFX10-NEXT: s_branch .LBB4_3
254+
; GFX10-NEXT: .LBB4_2: ; %Flow
255+
; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
207256
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9
208257
; GFX10-NEXT: s_xor_b32 s9, s8, -1
209258
; GFX10-NEXT: s_and_b32 s10, exec_lo, s7
@@ -212,8 +261,8 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
212261
; GFX10-NEXT: s_and_b32 s9, exec_lo, s9
213262
; GFX10-NEXT: s_or_b32 s6, s6, s9
214263
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
215-
; GFX10-NEXT: s_cbranch_execz .LBB3_5
216-
; GFX10-NEXT: .LBB3_3: ; %loop.start
264+
; GFX10-NEXT: s_cbranch_execz .LBB4_5
265+
; GFX10-NEXT: .LBB4_3: ; %loop.start
217266
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
218267
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
219268
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -228,9 +277,9 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
228277
; GFX10-NEXT: s_waitcnt vmcnt(0)
229278
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
230279
; GFX10-NEXT: s_and_saveexec_b32 s9, vcc_lo
231-
; GFX10-NEXT: s_cbranch_execz .LBB3_2
280+
; GFX10-NEXT: s_cbranch_execz .LBB4_2
232281
; GFX10-NEXT: ; %bb.4: ; %loop.cond
233-
; GFX10-NEXT: ; in Loop: Header=BB3_3 Depth=1
282+
; GFX10-NEXT: ; in Loop: Header=BB4_3 Depth=1
234283
; GFX10-NEXT: v_add_nc_u32_e32 v6, 1, v5
235284
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
236285
; GFX10-NEXT: s_andn2_b32 s8, s8, exec_lo
@@ -240,20 +289,20 @@ define void @divergent_i1_xor_used_outside_loop_larger_loop_body(i32 %num.elts,
240289
; GFX10-NEXT: s_and_b32 s11, exec_lo, vcc_lo
241290
; GFX10-NEXT: s_or_b32 s8, s8, s10
242291
; GFX10-NEXT: s_or_b32 s7, s7, s11
243-
; GFX10-NEXT: s_branch .LBB3_2
244-
; GFX10-NEXT: .LBB3_5: ; %loop.exit.guard
292+
; GFX10-NEXT: s_branch .LBB4_2
293+
; GFX10-NEXT: .LBB4_5: ; %loop.exit.guard
245294
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
246295
; GFX10-NEXT: s_andn2_b32 s5, -1, exec_lo
247296
; GFX10-NEXT: s_and_b32 s6, exec_lo, s6
248297
; GFX10-NEXT: s_or_b32 s6, s5, s6
249-
; GFX10-NEXT: .LBB3_6: ; %Flow1
298+
; GFX10-NEXT: .LBB4_6: ; %Flow1
250299
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
251300
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
252-
; GFX10-NEXT: s_cbranch_execz .LBB3_8
301+
; GFX10-NEXT: s_cbranch_execz .LBB4_8
253302
; GFX10-NEXT: ; %bb.7: ; %block.after.loop
254303
; GFX10-NEXT: v_mov_b32_e32 v0, 5
255304
; GFX10-NEXT: flat_store_dword v[3:4], v0
256-
; GFX10-NEXT: .LBB3_8: ; %exit
305+
; GFX10-NEXT: .LBB4_8: ; %exit
257306
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
258307
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
259308
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -299,51 +348,51 @@ define void @divergent_i1_icmp_used_outside_loop(i32 %v0, i32 %v1, ptr addrspace
299348
; GFX10-NEXT: s_mov_b32 s5, 0
300349
; GFX10-NEXT: ; implicit-def: $sgpr6
301350
; GFX10-NEXT: v_mov_b32_e32 v4, s5
302-
; GFX10-NEXT: s_branch .LBB4_2
303-
; GFX10-NEXT: .LBB4_1: ; %Flow
304-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
351+
; GFX10-NEXT: s_branch .LBB5_2
352+
; GFX10-NEXT: .LBB5_1: ; %Flow
353+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
305354
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8
306355
; GFX10-NEXT: s_and_b32 s4, exec_lo, s7
307356
; GFX10-NEXT: s_or_b32 s5, s4, s5
308357
; GFX10-NEXT: s_andn2_b32 s4, s6, exec_lo
309358
; GFX10-NEXT: s_and_b32 s6, exec_lo, vcc_lo
310359
; GFX10-NEXT: s_or_b32 s6, s4, s6
311360
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5
312-
; GFX10-NEXT: s_cbranch_execz .LBB4_6
313-
; GFX10-NEXT: .LBB4_2: ; %cond.block.0
361+
; GFX10-NEXT: s_cbranch_execz .LBB5_6
362+
; GFX10-NEXT: .LBB5_2: ; %cond.block.0
314363
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
315364
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v4
316365
; GFX10-NEXT: s_and_saveexec_b32 s7, vcc_lo
317-
; GFX10-NEXT: s_cbranch_execz .LBB4_4
366+
; GFX10-NEXT: s_cbranch_execz .LBB5_4
318367
; GFX10-NEXT: ; %bb.3: ; %if.block.0
319-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
368+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
320369
; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4
321370
; GFX10-NEXT: v_lshlrev_b64 v[8:9], 2, v[4:5]
322371
; GFX10-NEXT: v_add_co_u32 v8, s4, v2, v8
323372
; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s4, v3, v9, s4
324373
; GFX10-NEXT: global_store_dword v[8:9], v4, off
325-
; GFX10-NEXT: .LBB4_4: ; %loop.break.block
326-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
374+
; GFX10-NEXT: .LBB5_4: ; %loop.break.block
375+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
327376
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
328377
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s7
329378
; GFX10-NEXT: v_cmp_ne_u32_e64 s4, v1, v4
330379
; GFX10-NEXT: s_mov_b32 s7, -1
331380
; GFX10-NEXT: s_and_saveexec_b32 s8, s4
332-
; GFX10-NEXT: s_cbranch_execz .LBB4_1
381+
; GFX10-NEXT: s_cbranch_execz .LBB5_1
333382
; GFX10-NEXT: ; %bb.5: ; %loop.cond
334-
; GFX10-NEXT: ; in Loop: Header=BB4_2 Depth=1
383+
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
335384
; GFX10-NEXT: v_add_nc_u32_e32 v4, 1, v4
336385
; GFX10-NEXT: s_andn2_b32 s4, -1, exec_lo
337386
; GFX10-NEXT: s_and_b32 s7, exec_lo, 0
338387
; GFX10-NEXT: s_or_b32 s7, s4, s7
339-
; GFX10-NEXT: s_branch .LBB4_1
340-
; GFX10-NEXT: .LBB4_6: ; %cond.block.1
388+
; GFX10-NEXT: s_branch .LBB5_1
389+
; GFX10-NEXT: .LBB5_6: ; %cond.block.1
341390
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5
342391
; GFX10-NEXT: s_and_saveexec_b32 s4, s6
343-
; GFX10-NEXT: s_cbranch_execz .LBB4_8
392+
; GFX10-NEXT: s_cbranch_execz .LBB5_8
344393
; GFX10-NEXT: ; %bb.7: ; %if.block.1
345394
; GFX10-NEXT: global_store_dword v[6:7], v4, off
346-
; GFX10-NEXT: .LBB4_8: ; %exit
395+
; GFX10-NEXT: .LBB5_8: ; %exit
347396
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
348397
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
349398
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -408,9 +457,9 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
408457
; GFX10-NEXT: v_mov_b32_e32 v5, s0
409458
; GFX10-NEXT: ; implicit-def: $sgpr1
410459
; GFX10-NEXT: ; implicit-def: $sgpr2
411-
; GFX10-NEXT: s_branch .LBB5_2
412-
; GFX10-NEXT: .LBB5_1: ; %loop.cond
413-
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
460+
; GFX10-NEXT: s_branch .LBB6_2
461+
; GFX10-NEXT: .LBB6_1: ; %loop.cond
462+
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
414463
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
415464
; GFX10-NEXT: v_cmp_lt_i32_e32 vcc_lo, v5, v0
416465
; GFX10-NEXT: v_add_nc_u32_e32 v5, 1, v5
@@ -421,16 +470,16 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
421470
; GFX10-NEXT: s_or_b32 s3, s3, s4
422471
; GFX10-NEXT: s_or_b32 s1, s1, s4
423472
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
424-
; GFX10-NEXT: s_cbranch_execz .LBB5_4
425-
; GFX10-NEXT: .LBB5_2: ; %loop.start
473+
; GFX10-NEXT: s_cbranch_execz .LBB6_4
474+
; GFX10-NEXT: .LBB6_2: ; %loop.start
426475
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
427476
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
428477
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
429478
; GFX10-NEXT: s_or_b32 s2, s2, s4
430479
; GFX10-NEXT: s_and_saveexec_b32 s4, s3
431-
; GFX10-NEXT: s_cbranch_execz .LBB5_1
480+
; GFX10-NEXT: s_cbranch_execz .LBB6_1
432481
; GFX10-NEXT: ; %bb.3: ; %is.eq.zero
433-
; GFX10-NEXT: ; in Loop: Header=BB5_2 Depth=1
482+
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
434483
; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5
435484
; GFX10-NEXT: s_andn2_b32 s2, s2, exec_lo
436485
; GFX10-NEXT: v_lshlrev_b64 v[6:7], 2, v[5:6]
@@ -442,8 +491,8 @@ define amdgpu_ps void @divergent_i1_freeze_used_outside_loop(i32 %n, ptr addrspa
442491
; GFX10-NEXT: s_and_b32 s3, exec_lo, vcc_lo
443492
; GFX10-NEXT: s_or_b32 s2, s2, s3
444493
; GFX10-NEXT: ; implicit-def: $sgpr3
445-
; GFX10-NEXT: s_branch .LBB5_1
446-
; GFX10-NEXT: .LBB5_4: ; %exit
494+
; GFX10-NEXT: s_branch .LBB6_1
495+
; GFX10-NEXT: .LBB6_4: ; %exit
447496
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
448497
; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, s1
449498
; GFX10-NEXT: flat_store_dword v[3:4], v0
@@ -484,9 +533,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
484533
; GFX10-NEXT: ; implicit-def: $sgpr2
485534
; GFX10-NEXT: ; implicit-def: $sgpr3
486535
; GFX10-NEXT: v_mov_b32_e32 v6, s0
487-
; GFX10-NEXT: s_branch .LBB6_2
488-
; GFX10-NEXT: .LBB6_1: ; %Flow
489-
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
536+
; GFX10-NEXT: s_branch .LBB7_2
537+
; GFX10-NEXT: .LBB7_1: ; %Flow
538+
; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
490539
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
491540
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
492541
; GFX10-NEXT: s_and_b32 s4, exec_lo, s2
@@ -495,8 +544,8 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
495544
; GFX10-NEXT: s_and_b32 s4, exec_lo, s3
496545
; GFX10-NEXT: s_or_b32 s1, s1, s4
497546
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s0
498-
; GFX10-NEXT: s_cbranch_execz .LBB6_4
499-
; GFX10-NEXT: .LBB6_2: ; %A
547+
; GFX10-NEXT: s_cbranch_execz .LBB7_4
548+
; GFX10-NEXT: .LBB7_2: ; %A
500549
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
501550
; GFX10-NEXT: v_ashrrev_i32_e32 v7, 31, v6
502551
; GFX10-NEXT: s_andn2_b32 s3, s3, exec_lo
@@ -511,9 +560,9 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
511560
; GFX10-NEXT: s_waitcnt vmcnt(0)
512561
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9
513562
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
514-
; GFX10-NEXT: s_cbranch_execz .LBB6_1
563+
; GFX10-NEXT: s_cbranch_execz .LBB7_1
515564
; GFX10-NEXT: ; %bb.3: ; %loop.body
516-
; GFX10-NEXT: ; in Loop: Header=BB6_2 Depth=1
565+
; GFX10-NEXT: ; in Loop: Header=BB7_2 Depth=1
517566
; GFX10-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7
518567
; GFX10-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo
519568
; GFX10-NEXT: v_add_nc_u32_e32 v10, 1, v6
@@ -529,16 +578,16 @@ define amdgpu_cs void @loop_with_1break(ptr addrspace(1) %x, ptr addrspace(1) %a
529578
; GFX10-NEXT: s_waitcnt vmcnt(0)
530579
; GFX10-NEXT: v_add_nc_u32_e32 v9, 1, v9
531580
; GFX10-NEXT: global_store_dword v[7:8], v9, off
532-
; GFX10-NEXT: s_branch .LBB6_1
533-
; GFX10-NEXT: .LBB6_4: ; %loop.exit.guard
581+
; GFX10-NEXT: s_branch .LBB7_1
582+
; GFX10-NEXT: .LBB7_4: ; %loop.exit.guard
534583
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
535584
; GFX10-NEXT: s_and_saveexec_b32 s0, s1
536585
; GFX10-NEXT: s_xor_b32 s0, exec_lo, s0
537-
; GFX10-NEXT: s_cbranch_execz .LBB6_6
586+
; GFX10-NEXT: s_cbranch_execz .LBB7_6
538587
; GFX10-NEXT: ; %bb.5: ; %break.body
539588
; GFX10-NEXT: v_mov_b32_e32 v0, 10
540589
; GFX10-NEXT: global_store_dword v[4:5], v0, off
541-
; GFX10-NEXT: .LBB6_6: ; %exit
590+
; GFX10-NEXT: .LBB7_6: ; %exit
542591
; GFX10-NEXT: s_endpgm
543592
entry:
544593
br label %A

0 commit comments

Comments
 (0)