Skip to content

Commit ebf4d4d

Browse files
committed
[LSR] Do not create duplicated PHI nodes while preserving LCSSA form
1 parent a696451 commit ebf4d4d

File tree

8 files changed

+57
-54
lines changed

8 files changed

+57
-54
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2186,6 +2186,12 @@ class LSRInstance {
21862186
/// Induction variables that were generated and inserted by the SCEV Expander.
21872187
SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
21882188

2189+
// Inserting instructions in the loop and using them as PHI's input could
2190+
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
2191+
// corresponding incoming block is not loop exiting). So collect all such
2192+
// instructions to form LCSSA for them later.
2193+
SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;
2194+
21892195
void OptimizeShadowIV();
21902196
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
21912197
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
@@ -2276,9 +2282,9 @@ class LSRInstance {
22762282
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22772283
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
22782284
const Formula &F,
2279-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2285+
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
22802286
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2281-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
2287+
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
22822288
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
22832289

22842290
public:
@@ -5858,17 +5864,11 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
58585864
/// Helper for Rewrite. PHI nodes are special because the use of their operands
58595865
/// effectively happens in their predecessor blocks, so the expression may need
58605866
/// to be expanded in multiple places.
5861-
void LSRInstance::RewriteForPHI(
5862-
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
5863-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5867+
void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,
5868+
const LSRFixup &LF, const Formula &F,
5869+
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
58645870
DenseMap<BasicBlock *, Value *> Inserted;
58655871

5866-
// Inserting instructions in the loop and using them as PHI's input could
5867-
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
5868-
// corresponding incoming block is not loop exiting). So collect all such
5869-
// instructions to form LCSSA for them later.
5870-
SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
5871-
58725872
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
58735873
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
58745874
bool needUpdateFixups = false;
@@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI(
59395939
// the inserted value.
59405940
if (auto *I = dyn_cast<Instruction>(FullV))
59415941
if (L->contains(I) && !L->contains(BB))
5942-
InsertedNonLCSSAInsts.push_back(I);
5942+
InsertedNonLCSSAInsts.insert(I);
59435943

59445944
PN->setIncomingValue(i, FullV);
59455945
Pair.first->second = FullV;
@@ -5983,16 +5983,14 @@ void LSRInstance::RewriteForPHI(
59835983
}
59845984
}
59855985
}
5986-
5987-
formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
59885986
}
59895987

59905988
/// Emit instructions for the leading candidate expression for this LSRUse (this
59915989
/// is called "expanding"), and update the UserInst to reference the newly
59925990
/// expanded value.
59935991
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
59945992
const Formula &F,
5995-
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
5993+
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
59965994
// First, find an insertion point that dominates UserInst. For PHI nodes,
59975995
// find the nearest block which dominates all the relevant uses.
59985996
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
@@ -6080,6 +6078,9 @@ void LSRInstance::ImplementSolution(
60806078
Changed = true;
60816079
}
60826080

6081+
auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
6082+
formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);
6083+
60836084
for (const IVChain &Chain : IVChainVec) {
60846085
GenerateIVChain(Chain, DeadInsts);
60856086
Changed = true;

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1604,7 +1604,7 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
16041604
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
16051605
; GFX1232_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
16061606
; GFX1232_DPP-NEXT: s_or_saveexec_b32 s4, -1
1607-
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1607+
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
16081608
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
16091609
; GFX1232_DPP-NEXT: s_wait_alu 0xfffe
16101610
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
@@ -5351,7 +5351,7 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
53515351
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4
53525352
; GFX1232_DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
53535353
; GFX1232_DPP-NEXT: s_or_saveexec_b32 s4, -1
5354-
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
5354+
; GFX1232_DPP-NEXT: s_delay_alu instid0(VALU_DEP_2)
53555355
; GFX1232_DPP-NEXT: v_writelane_b32 v3, s5, 16
53565356
; GFX1232_DPP-NEXT: s_wait_alu 0xfffe
53575357
; GFX1232_DPP-NEXT: s_mov_b32 exec_lo, s4

llvm/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,15 @@ define ptr @test1() {
2424
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
2525
; CHECK: bbA:
2626
; CHECK-NEXT: switch i32 0, label [[BBA_BB89_CRIT_EDGE:%.*]] [
27-
; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
28-
; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
27+
; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
28+
; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
2929
; CHECK-NEXT: ]
3030
; CHECK: bbA.bb89_crit_edge:
3131
; CHECK-NEXT: br label [[BB89:%.*]]
3232
; CHECK: bbB:
3333
; CHECK-NEXT: switch i8 0, label [[BBB_BB89_CRIT_EDGE:%.*]] [
34-
; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
35-
; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
34+
; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
35+
; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
3636
; CHECK-NEXT: ]
3737
; CHECK: bbB.bb89_crit_edge:
3838
; CHECK-NEXT: br label [[BB89]]
@@ -85,23 +85,22 @@ define ptr @test2() {
8585
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1
8686
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
8787
; CHECK: loopexit:
88-
; CHECK-NEXT: [[SCEVGEP_LCSSA1:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
8988
; CHECK-NEXT: [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
9089
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
9190
; CHECK: bbA:
9291
; CHECK-NEXT: switch i32 0, label [[BB89:%.*]] [
93-
; CHECK-NEXT: i32 47, label [[BB89]]
94-
; CHECK-NEXT: i32 58, label [[BB89]]
92+
; CHECK-NEXT: i32 47, label [[BB89]]
93+
; CHECK-NEXT: i32 58, label [[BB89]]
9594
; CHECK-NEXT: ]
9695
; CHECK: bbB:
9796
; CHECK-NEXT: switch i8 0, label [[BBB_EXIT_CRIT_EDGE:%.*]] [
98-
; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
99-
; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
97+
; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
98+
; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
10099
; CHECK-NEXT: ]
101100
; CHECK: bbB.exit_crit_edge:
102101
; CHECK-NEXT: br label [[EXIT:%.*]]
103102
; CHECK: bb89:
104-
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ]
103+
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ]
105104
; CHECK-NEXT: br label [[EXIT]]
106105
; CHECK: exit:
107106
; CHECK-NEXT: [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP_LCSSA]], [[BBB_EXIT_CRIT_EDGE]] ]

llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ define amdgpu_kernel void @scaledregtest() local_unnamed_addr {
1616
; CHECK-NEXT: entry:
1717
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1818
; CHECK: loopexit:
19-
; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
2019
; CHECK-NEXT: [[SCEVGEP11_LCSSA:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP11:%.*]], [[FOR_BODY]] ]
20+
; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
2121
; CHECK-NEXT: br label [[FOR_BODY_1:%.*]]
2222
; CHECK: for.body.1:
2323
; CHECK-NEXT: [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11_LCSSA]], [[LOOPEXIT:%.*]] ]

llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,17 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
2020
; CHECK-NEXT: .p2align 4, 0x90
2121
; CHECK-NEXT: .LBB0_1: # %bb1
2222
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
23-
; CHECK-NEXT: movq %rbx, %rcx
2423
; CHECK-NEXT: testb %al, %al
25-
; CHECK-NEXT: je .LBB0_3
24+
; CHECK-NEXT: je .LBB0_4
2625
; CHECK-NEXT: # %bb.2: # %bb4
2726
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
28-
; CHECK-NEXT: leaq 1(%rcx), %rbx
27+
; CHECK-NEXT: incq %rbx
2928
; CHECK-NEXT: testb %al, %al
3029
; CHECK-NEXT: jne .LBB0_1
31-
; CHECK-NEXT: .LBB0_3: # %bb8
32-
; CHECK-NEXT: movq %rcx, %rax
30+
; CHECK-NEXT: # %bb.3: # %bb8split
31+
; CHECK-NEXT: decq %rbx
32+
; CHECK-NEXT: .LBB0_4: # %bb8
33+
; CHECK-NEXT: movq %rbx, %rax
3334
; CHECK-NEXT: popq %rbx
3435
; CHECK-NEXT: retq
3536
bb:

llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ define i64 @blam(ptr %start, ptr %end, ptr %ptr.2) {
2121
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END:%.*]]
2222
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]]
2323
; CHECK: loop.2.ph:
24-
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
2524
; CHECK-NEXT: [[LSR_IV_NEXT5_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT5]], [[LOOP_1_HEADER]] ]
25+
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
2626
; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]]
2727
; CHECK: loop.2.header:
2828
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT5_LCSSA]], [[LOOP_2_PH]] ]

llvm/test/Transforms/LoopStrengthReduce/X86/missing-phi-operand-update.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,24 @@ define i32 @foo(ptr %A, i32 %t) {
1818
; CHECK-NEXT: entry:
1919
; CHECK-NEXT: br label [[LOOP_32:%.*]]
2020
; CHECK: loop.exit.loopexitsplitsplitsplit:
21-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1
21+
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV1:%.*]], [[IFMERGE_34:%.*]] ]
22+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], -1
2223
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]]
2324
; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge:
24-
; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ]
25+
; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_38:%.*]] ]
2526
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]]
2627
; CHECK: loop.exit.loopexitsplitsplit:
2728
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ]
2829
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]]
2930
; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge:
30-
; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ]
31+
; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_42:%.*]] ]
3132
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1
3233
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]]
3334
; CHECK: loop.exit.loopexitsplit:
3435
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ]
3536
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]]
3637
; CHECK: then.34.loop.exit.loopexit_crit_edge:
37-
; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ]
38+
; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV1]], [[THEN_34:%.*]] ]
3839
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2
3940
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]]
4041
; CHECK: loop.exit.loopexit:
@@ -48,31 +49,31 @@ define i32 @foo(ptr %A, i32 %t) {
4849
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ]
4950
; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
5051
; CHECK: loop.32:
51-
; CHECK-NEXT: [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
52+
; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
5253
; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ]
53-
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
54+
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
5455
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
5556
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 -4
5657
; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, ptr [[SCEVGEP8]], align 4
5758
; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]]
58-
; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]]
59+
; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34]]
5960
; CHECK: then.34:
60-
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
61+
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
6162
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
6263
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 -8
6364
; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, ptr [[SCEVGEP6]], align 4
6465
; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]]
6566
; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]]
6667
; CHECK: ifmerge.34:
67-
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
68+
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
6869
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
6970
; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
7071
; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]]
7172
; CHECK-NEXT: [[CMP_39:%.*]] = icmp slt i32 [[GEPLOAD]], [[T]]
7273
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]]
7374
; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]]
7475
; CHECK: ifmerge.38:
75-
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
76+
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
7677
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
7778
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4
7879
; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
@@ -81,7 +82,7 @@ define i32 @foo(ptr %A, i32 %t) {
8182
; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]]
8283
; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]]
8384
; CHECK: ifmerge.42:
84-
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
85+
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
8586
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
8687
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 8
8788
; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
@@ -91,7 +92,7 @@ define i32 @foo(ptr %A, i32 %t) {
9192
; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]]
9293
; CHECK: ifmerge.46:
9394
; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1
94-
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
95+
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV1]], 4
9596
; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12
9697
; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]]
9798
; CHECK: loop.25:

llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,23 +102,24 @@ define i64 @test_duplicated_phis(i64 noundef %N) {
102102
; LEGACYPM-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
103103
; LEGACYPM: for.body.preheader.new:
104104
; LEGACYPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
105+
; LEGACYPM-NEXT: [[TMP1:%.*]] = add i64 [[UNROLL_ITER]], -4
106+
; LEGACYPM-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
107+
; LEGACYPM-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 1
108+
; LEGACYPM-NEXT: [[TMP4:%.*]] = sub i64 -3, [[TMP3]]
105109
; LEGACYPM-NEXT: br label [[FOR_BODY:%.*]]
106110
; LEGACYPM: for.body:
107-
; LEGACYPM-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -1, [[FOR_BODY_PREHEADER_NEW]] ]
108111
; LEGACYPM-NEXT: [[I_07:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_BODY]] ]
109112
; LEGACYPM-NEXT: [[INC_3]] = add i64 [[I_07]], 4
110-
; LEGACYPM-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
111113
; LEGACYPM-NEXT: [[NITER_NCMP_3_NOT:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[INC_3]]
112114
; LEGACYPM-NEXT: br i1 [[NITER_NCMP_3_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]]
113115
; LEGACYPM: for.end.loopexit.unr-lcssa.loopexit:
114-
; LEGACYPM-NEXT: [[LSR_IV_NEXT_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT]], [[FOR_BODY]] ]
115-
; LEGACYPM-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_NEXT]], 1
116+
; LEGACYPM-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1
116117
; LEGACYPM-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]]
117118
; LEGACYPM: for.end.loopexit.unr-lcssa:
118-
; LEGACYPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TMP1]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
119-
; LEGACYPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT_LCSSA]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
120-
; LEGACYPM-NEXT: [[TMP2:%.*]] = and i64 [[N]], 1
121-
; LEGACYPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP2]], 0
119+
; LEGACYPM-NEXT: [[RES_1_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TMP5]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
120+
; LEGACYPM-NEXT: [[RES_09_UNR:%.*]] = phi i64 [ -1, [[FOR_BODY_PREHEADER]] ], [ [[TMP4]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ]
121+
; LEGACYPM-NEXT: [[TMP6:%.*]] = and i64 [[N]], 1
122+
; LEGACYPM-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[TMP6]], 0
122123
; LEGACYPM-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[LCMP_MOD_NOT]], i64 [[RES_1_LCSSA_PH]], i64 [[RES_09_UNR]]
123124
; LEGACYPM-NEXT: br label [[FOR_END]]
124125
; LEGACYPM: for.end:

0 commit comments

Comments
 (0)