Skip to content

Commit 1f2a634

Browse files
committed
Reland "[LSR] Do not create duplicated PHI nodes while preserving LCSSA form" (#107380)
Motivating example: https://godbolt.org/z/eb97zrxhx Here we have 2 induction variables in the loop: one is corresponding to i variable (add rdx, 4), the other - to res (add rax, 2). The second induction variable can be removed by rewriteLoopExitValues() method (final value of res at loop exit is unroll_iter * -2); however, this doesn't happen because we have duplicated LCSSA phi nodes at loop exit: ``` ; Preheader: for.body.preheader.new: ; preds = %for.body.preheader %unroll_iter = and i64 %N, -4 br label %for.body ; Loop: for.body: ; preds = %for.body, %for.body.preheader.new %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 0, %for.body.preheader.new ] %i.07 = phi i64 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] %inc.3 = add nuw i64 %i.07, 4 %lsr.iv.next = add nsw i64 %lsr.iv, -2 %niter.ncmp.3.not = icmp eq i64 %unroll_iter, %inc.3 br i1 %niter.ncmp.3.not, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !llvm.loop !7 ; Exit blocks for.end.loopexit.unr-lcssa.loopexit: ; preds = %for.body %inc.3.lcssa = phi i64 [ %inc.3, %for.body ] %lsr.iv.next.lcssa11 = phi i64 [ %lsr.iv.next, %for.body ] %lsr.iv.next.lcssa = phi i64 [ %lsr.iv.next, %for.body ] br label %for.end.loopexit.unr-lcssa ``` rewriteLoopExitValues requires %lsr.iv.next value to have only 2 uses: one in LCSSA phi node, the other - in induction phi node. Here we have 3 uses of this value because of duplicated lcssa nodes, so the transform doesn't apply and leads to an extra add operation inside the loop. The proposed solution is to accumulate inserted instructions that will require LCSSA form update into SetVector and then call formLCSSAForInstructions for this SetVector once, so the same instructions don't process twice. Reland fixes the issue with preserve-lcssa.ll test: it fails in the situation when x86_64-unknown-linux-gnu target is unavailable in opt. The changes are moved into separate duplicated-phis.ll test with explicit x86 target requirement to fix bots which are not building this target.
1 parent 17f0c5d commit 1f2a634

File tree

7 files changed

+51
-48
lines changed

7 files changed

+51
-48
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2186,6 +2186,12 @@ class LSRInstance {
21862186
/// Induction variables that were generated and inserted by the SCEV Expander.
21872187
SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
21882188

2189+
// Inserting instructions in the loop and using them as PHI's input could
2190+
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
2191+
// corresponding incoming block is not loop exiting). So collect all such
2192+
// instructions to form LCSSA for them later.
2193+
SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2194+
21892195
void OptimizeShadowIV();
21902196
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
21912197
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
@@ -2276,9 +2282,9 @@ class LSRInstance {
22762282
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22772283
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
22782284
const Formula &F,
2279-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2285+
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
22802286
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2281-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2287+
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
22822288
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
22832289

22842290
public:
@@ -5858,17 +5864,11 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
58585864
/// Helper for Rewrite. PHI nodes are special because the use of their operands
58595865
/// effectively happens in their predecessor blocks, so the expression may need
58605866
/// to be expanded in multiple places.
5861-
void LSRInstance::RewriteForPHI(
5862-
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
5863-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5867+
void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,
5868+
const LSRFixup &LF, const Formula &F,
5869+
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
58645870
DenseMap<BasicBlock *, Value *> Inserted;
58655871

5866-
// Inserting instructions in the loop and using them as PHI's input could
5867-
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
5868-
// corresponding incoming block is not loop exiting). So collect all such
5869-
// instructions to form LCSSA for them later.
5870-
SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
5871-
58725872
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
58735873
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
58745874
bool needUpdateFixups = false;
@@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI(
59395939
// the inserted value.
59405940
if (auto *I = dyn_cast<Instruction>(FullV))
59415941
if (L->contains(I) && !L->contains(BB))
5942-
InsertedNonLCSSAInsts.push_back(I);
5942+
InsertedNonLCSSAInsts.insert(I);
59435943

59445944
PN->setIncomingValue(i, FullV);
59455945
Pair.first->second = FullV;
@@ -5983,16 +5983,14 @@ void LSRInstance::RewriteForPHI(
59835983
}
59845984
}
59855985
}
5986-
5987-
formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
59885986
}
59895987

59905988
/// Emit instructions for the leading candidate expression for this LSRUse (this
59915989
/// is called "expanding"), and update the UserInst to reference the newly
59925990
/// expanded value.
59935991
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
59945992
const Formula &F,
5995-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5993+
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
59965994
// First, find an insertion point that dominates UserInst. For PHI nodes,
59975995
// find the nearest block which dominates all the relevant uses.
59985996
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -6080,6 +6078,9 @@ void LSRInstance::ImplementSolution(
60806078
Changed = true;
60816079
}
60826080

6081+
auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6082+
formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);
6083+
60836084
for (const IVChain &Chain : IVChainVec) {
60846085
GenerateIVChain(Chain, DeadInsts);
60856086
Changed = true;

llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ define ptr @test1() {
2424
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
2525
; CHECK: bbA:
2626
; CHECK-NEXT: switch i32 0, label [[BBA_BB89_CRIT_EDGE:%.*]] [
27-
; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
28-
; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
27+
; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
28+
; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
2929
; CHECK-NEXT: ]
3030
; CHECK: bbA.bb89_crit_edge:
3131
; CHECK-NEXT: br label [[BB89:%.*]]
3232
; CHECK: bbB:
3333
; CHECK-NEXT: switch i8 0, label [[BBB_BB89_CRIT_EDGE:%.*]] [
34-
; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
35-
; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
34+
; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
35+
; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
3636
; CHECK-NEXT: ]
3737
; CHECK: bbB.bb89_crit_edge:
3838
; CHECK-NEXT: br label [[BB89]]
@@ -85,23 +85,22 @@ define ptr @test2() {
8585
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1
8686
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
8787
; CHECK: loopexit:
88-
; CHECK-NEXT: [[SCEVGEP_LCSSA1:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
8988
; CHECK-NEXT: [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
9089
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
9190
; CHECK: bbA:
9291
; CHECK-NEXT: switch i32 0, label [[BB89:%.*]] [
93-
; CHECK-NEXT: i32 47, label [[BB89]]
94-
; CHECK-NEXT: i32 58, label [[BB89]]
92+
; CHECK-NEXT: i32 47, label [[BB89]]
93+
; CHECK-NEXT: i32 58, label [[BB89]]
9594
; CHECK-NEXT: ]
9695
; CHECK: bbB:
9796
; CHECK-NEXT: switch i8 0, label [[BBB_EXIT_CRIT_EDGE:%.*]] [
98-
; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
99-
; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
97+
; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
98+
; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
10099
; CHECK-NEXT: ]
101100
; CHECK: bbB.exit_crit_edge:
102101
; CHECK-NEXT: br label [[EXIT:%.*]]
103102
; CHECK: bb89:
104-
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ]
103+
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ]
105104
; CHECK-NEXT: br label [[EXIT]]
106105
; CHECK: exit:
107106
; CHECK-NEXT: [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP_LCSSA]], [[BBB_EXIT_CRIT_EDGE]] ]

llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ define amdgpu_kernel void @scaledregtest() local_unnamed_addr {
1616
; CHECK-NEXT: entry:
1717
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1818
; CHECK: loopexit:
19-
; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
2019
; CHECK-NEXT: [[SCEVGEP11_LCSSA:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP11:%.*]], [[FOR_BODY]] ]
20+
; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
2121
; CHECK-NEXT: br label [[FOR_BODY_1:%.*]]
2222
; CHECK: for.body.1:
2323
; CHECK-NEXT: [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11_LCSSA]], [[LOOPEXIT:%.*]] ]

llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,17 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
2020
; CHECK-NEXT: .p2align 4, 0x90
2121
; CHECK-NEXT: .LBB0_1: # %bb1
2222
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
23-
; CHECK-NEXT: movq %rbx, %rcx
2423
; CHECK-NEXT: testb %al, %al
25-
; CHECK-NEXT: je .LBB0_3
24+
; CHECK-NEXT: je .LBB0_4
2625
; CHECK-NEXT: # %bb.2: # %bb4
2726
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
28-
; CHECK-NEXT: leaq 1(%rcx), %rbx
27+
; CHECK-NEXT: incq %rbx
2928
; CHECK-NEXT: testb %al, %al
3029
; CHECK-NEXT: jne .LBB0_1
31-
; CHECK-NEXT: .LBB0_3: # %bb8
32-
; CHECK-NEXT: movq %rcx, %rax
30+
; CHECK-NEXT: # %bb.3: # %bb8split
31+
; CHECK-NEXT: decq %rbx
32+
; CHECK-NEXT: .LBB0_4: # %bb8
33+
; CHECK-NEXT: movq %rbx, %rax
3334
; CHECK-NEXT: popq %rbx
3435
; CHECK-NEXT: retq
3536
bb:

llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ define i64 @blam(ptr %start, ptr %end, ptr %ptr.2) {
2121
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END:%.*]]
2222
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]]
2323
; CHECK: loop.2.ph:
24-
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
2524
; CHECK-NEXT: [[LSR_IV_NEXT5_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT5]], [[LOOP_1_HEADER]] ]
25+
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
2626
; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]]
2727
; CHECK: loop.2.header:
2828
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT5_LCSSA]], [[LOOP_2_PH]] ]

llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,24 @@ define i32 @foo(ptr %A, i32 %t) {
1818
; CHECK-NEXT: entry:
1919
; CHECK-NEXT: br label [[LOOP_32:%.*]]
2020
; CHECK: loop.exit.loopexitsplitsplitsplit:
21-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1
21+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV1:%.*]], [[IFMERGE_34:%.*]] ]
22+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], -1
2223
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]]
2324
; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge:
24-
; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ]
25+
; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_38:%.*]] ]
2526
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]]
2627
; CHECK: loop.exit.loopexitsplitsplit:
2728
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ]
2829
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]]
2930
; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge:
30-
; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ]
31+
; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_42:%.*]] ]
3132
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1
3233
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]]
3334
; CHECK: loop.exit.loopexitsplit:
3435
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ]
3536
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]]
3637
; CHECK: then.34.loop.exit.loopexit_crit_edge:
37-
; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ]
38+
; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV1]], [[THEN_34:%.*]] ]
3839
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2
3940
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]]
4041
; CHECK: loop.exit.loopexit:
@@ -48,31 +49,31 @@ define i32 @foo(ptr %A, i32 %t) {
4849
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ]
4950
; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
5051
; CHECK: loop.32:
51-
; CHECK-NEXT: [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
52+
; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
5253
; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ]
53-
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
54+
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
5455
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
5556
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 -4
5657
; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, ptr [[SCEVGEP8]], align 4
5758
; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]]
58-
; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]]
59+
; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34]]
5960
; CHECK: then.34:
60-
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
61+
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
6162
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
6263
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 -8
6364
; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, ptr [[SCEVGEP6]], align 4
6465
; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]]
6566
; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]]
6667
; CHECK: ifmerge.34:
67-
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
68+
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
6869
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
6970
; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
7071
; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]]
7172
; CHECK-NEXT: [[CMP_39:%.*]] = icmp slt i32 [[GEPLOAD]], [[T]]
7273
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]]
7374
; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]]
7475
; CHECK: ifmerge.38:
75-
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
76+
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
7677
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
7778
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4
7879
; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
@@ -81,7 +82,7 @@ define i32 @foo(ptr %A, i32 %t) {
8182
; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]]
8283
; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]]
8384
; CHECK: ifmerge.42:
84-
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
85+
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
8586
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
8687
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 8
8788
; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
@@ -91,7 +92,7 @@ define i32 @foo(ptr %A, i32 %t) {
9192
; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]]
9293
; CHECK: ifmerge.46:
9394
; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1
94-
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
95+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV1]], 4
9596
; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12
9697
; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]]
9798
; CHECK: loop.25:

llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,22 @@ define i64 @test_duplicated_phis(i64 noundef %N) {
1717
; CHECK-NEXT: br i1 [[TMP0]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
1818
; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
1919
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
20+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UNROLL_ITER]], -4
21+
; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 2
22+
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP5]], 1
23+
; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = sub i64 -3, [[TMP3]]
2024
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
2125
; CHECK: [[FOR_BODY]]:
22-
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ -1, %[[FOR_BODY_PREHEADER_NEW]] ]
2326
; CHECK-NEXT: [[I_07:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], %[[FOR_BODY]] ]
2427
; CHECK-NEXT: [[INC_3]] = add i64 [[I_07]], 4
25-
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
2628
; CHECK-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
2729
; CHECK-NEXT: br i1 [[NITER_NCMP_3_NOT]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:.*]], label %[[FOR_BODY]]
2830
; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]]:
29-
; CHECK-NEXT: [[LSR_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT]], %[[FOR_BODY]] ]
3031
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], 1
3132
; CHECK-NEXT: br label %[[FOR_END_LOOPEXIT_UNR_LCSSA]]
3233
; CHECK: [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
3334
; CHECK-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, %[[FOR_BODY_PREHEADER]] ], [ [[TMP1]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
34-
; CHECK-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, %[[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT_LCSSA]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
35+
; CHECK-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, %[[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT]], %[[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
3536
; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[N]], 1
3637
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP2]], 0
3738
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]

0 commit comments

Comments
 (0)