Skip to content

[LSR] Do not create duplicated PHI nodes while preserving LCSSA form #107380

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2186,6 +2186,12 @@ class LSRInstance {
/// Induction variables that were generated and inserted by the SCEV Expander.
SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;

// Inserting instructions in the loop and using them as PHI's input could
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
// corresponding incoming block is not loop exiting). So collect all such
// instructions to form LCSSA for them later.
SmallSetVector<Instruction *, 4> InsertedNonLCSSAInsts;

void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
Expand Down Expand Up @@ -2276,9 +2282,9 @@ class LSRInstance {
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
SmallVectorImpl<WeakTrackingVH> &DeadInsts);
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);

public:
Expand Down Expand Up @@ -5858,17 +5864,11 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
/// Helper for Rewrite. PHI nodes are special because the use of their operands
/// effectively happens in their predecessor blocks, so the expression may need
/// to be expanded in multiple places.
void LSRInstance::RewriteForPHI(
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU,
const LSRFixup &LF, const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
DenseMap<BasicBlock *, Value *> Inserted;

// Inserting instructions in the loop and using them as PHI's input could
// break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
// corresponding incoming block is not loop exiting). So collect all such
// instructions to form LCSSA for them later.
SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;

for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
bool needUpdateFixups = false;
Expand Down Expand Up @@ -5939,7 +5939,7 @@ void LSRInstance::RewriteForPHI(
// the inserted value.
if (auto *I = dyn_cast<Instruction>(FullV))
if (L->contains(I) && !L->contains(BB))
InsertedNonLCSSAInsts.push_back(I);
InsertedNonLCSSAInsts.insert(I);

PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
Expand Down Expand Up @@ -5983,16 +5983,14 @@ void LSRInstance::RewriteForPHI(
}
}
}

formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
}

/// Emit instructions for the leading candidate expression for this LSRUse (this
/// is called "expanding"), and update the UserInst to reference the newly
/// expanded value.
void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
Expand Down Expand Up @@ -6080,6 +6078,9 @@ void LSRInstance::ImplementSolution(
Changed = true;
}

auto InsertedInsts = InsertedNonLCSSAInsts.takeVector();
formLCSSAForInstructions(InsertedInsts, DT, LI, &SE);

for (const IVChain &Chain : IVChainVec) {
GenerateIVChain(Chain, DeadInsts);
Changed = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ define ptr @test1() {
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
; CHECK: bbA:
; CHECK-NEXT: switch i32 0, label [[BBA_BB89_CRIT_EDGE:%.*]] [
; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
; CHECK-NEXT: i32 47, label [[BBA_BB89_CRIT_EDGE]]
; CHECK-NEXT: i32 58, label [[BBA_BB89_CRIT_EDGE]]
; CHECK-NEXT: ]
; CHECK: bbA.bb89_crit_edge:
; CHECK-NEXT: br label [[BB89:%.*]]
; CHECK: bbB:
; CHECK-NEXT: switch i8 0, label [[BBB_BB89_CRIT_EDGE:%.*]] [
; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
; CHECK-NEXT: i8 47, label [[BBB_BB89_CRIT_EDGE]]
; CHECK-NEXT: i8 58, label [[BBB_BB89_CRIT_EDGE]]
; CHECK-NEXT: ]
; CHECK: bbB.bb89_crit_edge:
; CHECK-NEXT: br label [[BB89]]
Expand Down Expand Up @@ -85,23 +85,22 @@ define ptr @test2() {
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV]], i64 1
; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LOOPEXIT:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: [[SCEVGEP_LCSSA1:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
; CHECK-NEXT: [[SCEVGEP_LCSSA:%.*]] = phi ptr [ [[SCEVGEP]], [[LOOP]] ]
; CHECK-NEXT: br i1 false, label [[BBA:%.*]], label [[BBB:%.*]]
; CHECK: bbA:
; CHECK-NEXT: switch i32 0, label [[BB89:%.*]] [
; CHECK-NEXT: i32 47, label [[BB89]]
; CHECK-NEXT: i32 58, label [[BB89]]
; CHECK-NEXT: i32 47, label [[BB89]]
; CHECK-NEXT: i32 58, label [[BB89]]
; CHECK-NEXT: ]
; CHECK: bbB:
; CHECK-NEXT: switch i8 0, label [[BBB_EXIT_CRIT_EDGE:%.*]] [
; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
; CHECK-NEXT: i8 47, label [[BBB_EXIT_CRIT_EDGE]]
; CHECK-NEXT: i8 58, label [[BBB_EXIT_CRIT_EDGE]]
; CHECK-NEXT: ]
; CHECK: bbB.exit_crit_edge:
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: bb89:
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ], [ [[SCEVGEP_LCSSA1]], [[BBA]] ]
; CHECK-NEXT: [[TMP75PHI:%.*]] = phi ptr [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ], [ [[SCEVGEP_LCSSA]], [[BBA]] ]
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: [[RESULT:%.*]] = phi ptr [ [[TMP75PHI]], [[BB89]] ], [ [[SCEVGEP_LCSSA]], [[BBB_EXIT_CRIT_EDGE]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ define amdgpu_kernel void @scaledregtest() local_unnamed_addr {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: loopexit:
; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SCEVGEP11_LCSSA:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP11:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SCEVGEP13_LCSSA:%.*]] = phi ptr [ [[SCEVGEP13:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: br label [[FOR_BODY_1:%.*]]
; CHECK: for.body.1:
; CHECK-NEXT: [[LSR_IV5:%.*]] = phi ptr addrspace(5) [ [[SCEVGEP6:%.*]], [[FOR_BODY_1]] ], [ [[SCEVGEP11_LCSSA]], [[LOOPEXIT:%.*]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,17 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movq %rbx, %rcx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.2: # %bb4
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: leaq 1(%rcx), %rbx
; CHECK-NEXT: incq %rbx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: .LBB0_3: # %bb8
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: # %bb.3: # %bb8split
; CHECK-NEXT: decq %rbx
; CHECK-NEXT: .LBB0_4: # %bb8
; CHECK-NEXT: movq %rbx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
bb:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ define i64 @blam(ptr %start, ptr %end, ptr %ptr.2) {
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV_NEXT]], [[END:%.*]]
; CHECK-NEXT: br i1 [[EC]], label [[LOOP_2_PH:%.*]], label [[LOOP_1_HEADER]]
; CHECK: loop.2.ph:
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
; CHECK-NEXT: [[LSR_IV_NEXT5_LCSSA:%.*]] = phi i64 [ [[LSR_IV_NEXT5]], [[LOOP_1_HEADER]] ]
; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi ptr [ [[IV_NEXT]], [[LOOP_1_HEADER]] ]
; CHECK-NEXT: br label [[LOOP_2_HEADER:%.*]]
; CHECK: loop.2.header:
; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[LOOP_2_LATCH:%.*]] ], [ [[LSR_IV_NEXT5_LCSSA]], [[LOOP_2_PH]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,24 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP_32:%.*]]
; CHECK: loop.exit.loopexitsplitsplitsplit:
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV:%.*]], -1
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV1:%.*]], [[IFMERGE_34:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT:%.*]]
; CHECK: ifmerge.38.loop.exit.loopexitsplitsplit_crit_edge:
; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_38:%.*]] ]
; CHECK-NEXT: [[LSR_IV_LCSSA10:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_38:%.*]] ]
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLITSPLIT]]
; CHECK: loop.exit.loopexitsplitsplit:
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH_PH:%.*]] = phi i64 [ [[LSR_IV_LCSSA10]], [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE:%.*]] ], [ [[TMP0]], [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT:%.*]] ]
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT:%.*]]
; CHECK: ifmerge.42.loop.exit.loopexitsplit_crit_edge:
; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV]], [[IFMERGE_42:%.*]] ]
; CHECK-NEXT: [[LSR_IV_LCSSA11:%.*]] = phi i64 [ [[LSR_IV1]], [[IFMERGE_42:%.*]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[LSR_IV_LCSSA11]], 1
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXITSPLIT]]
; CHECK: loop.exit.loopexitsplit:
; CHECK-NEXT: [[INDVARS_IV_LCSSA_PH_PH:%.*]] = phi i64 [ [[TMP1]], [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE:%.*]] ], [ [[INDVARS_IV_LCSSA_PH_PH_PH]], [[LOOP_EXIT_LOOPEXITSPLITSPLIT]] ]
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT:%.*]]
; CHECK: then.34.loop.exit.loopexit_crit_edge:
; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV]], [[THEN_34:%.*]] ]
; CHECK-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i64 [ [[LSR_IV1]], [[THEN_34:%.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[LSR_IV_LCSSA]], -2
; CHECK-NEXT: br label [[LOOP_EXIT_LOOPEXIT]]
; CHECK: loop.exit.loopexit:
Expand All @@ -48,31 +49,31 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: [[I_0_LCSSA:%.*]] = phi i32 [ [[TMP]], [[LOOP_EXIT]] ], [ 50, [[THEN_8_1]] ], [ 50, [[IFMERGE_8:%.*]] ]
; CHECK-NEXT: ret i32 [[I_0_LCSSA]]
; CHECK: loop.32:
; CHECK-NEXT: [[LSR_IV]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[LSR_IV1]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[IFMERGE_46:%.*]] ], [ 2, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I1_I64_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[NEXTIVLOOP_32:%.*]], [[IFMERGE_46]] ]
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, ptr [[SCEVGEP7]], i64 -4
; CHECK-NEXT: [[GEPLOAD:%.*]] = load i32, ptr [[SCEVGEP8]], align 4
; CHECK-NEXT: [[CMP_34:%.*]] = icmp sgt i32 [[GEPLOAD]], [[T]]
; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34:%.*]]
; CHECK-NEXT: br i1 [[CMP_34]], label [[THEN_34]], label [[IFMERGE_34]]
; CHECK: then.34:
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]]
; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[SCEVGEP5]], i64 -8
; CHECK-NEXT: [[GEPLOAD18:%.*]] = load i32, ptr [[SCEVGEP6]], align 4
; CHECK-NEXT: [[CMP_35:%.*]] = icmp slt i32 [[GEPLOAD18]], [[T]]
; CHECK-NEXT: br i1 [[CMP_35]], label [[THEN_34_LOOP_EXIT_LOOPEXIT_CRIT_EDGE]], label [[IFMERGE_34]]
; CHECK: ifmerge.34:
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]]
; CHECK-NEXT: [[GEPLOAD20:%.*]] = load i32, ptr [[SCEVGEP4]], align 4
; CHECK-NEXT: [[CMP_38:%.*]] = icmp sgt i32 [[GEPLOAD20]], [[T]]
; CHECK-NEXT: [[CMP_39:%.*]] = icmp slt i32 [[GEPLOAD]], [[T]]
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP_38]], [[CMP_39]]
; CHECK-NEXT: br i1 [[OR_COND]], label [[LOOP_EXIT_LOOPEXITSPLITSPLITSPLIT]], label [[IFMERGE_38]]
; CHECK: ifmerge.38:
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]]
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SCEVGEP2]], i64 4
; CHECK-NEXT: [[GEPLOAD24:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
Expand All @@ -81,7 +82,7 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: [[OR_COND55:%.*]] = and i1 [[CMP_42]], [[CMP_43]]
; CHECK-NEXT: br i1 [[OR_COND55]], label [[IFMERGE_38_LOOP_EXIT_LOOPEXITSPLITSPLIT_CRIT_EDGE]], label [[IFMERGE_42]]
; CHECK: ifmerge.42:
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV]], 2
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[LSR_IV1]], 2
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]]
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 8
; CHECK-NEXT: [[GEPLOAD28:%.*]] = load i32, ptr [[SCEVGEP1]], align 4
Expand All @@ -91,7 +92,7 @@ define i32 @foo(ptr %A, i32 %t) {
; CHECK-NEXT: br i1 [[OR_COND56]], label [[IFMERGE_42_LOOP_EXIT_LOOPEXITSPLIT_CRIT_EDGE]], label [[IFMERGE_46]]
; CHECK: ifmerge.46:
; CHECK-NEXT: [[NEXTIVLOOP_32]] = add nuw nsw i64 [[I1_I64_0]], 1
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 4
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV1]], 4
; CHECK-NEXT: [[CONDLOOP_32:%.*]] = icmp ult i64 [[NEXTIVLOOP_32]], 12
; CHECK-NEXT: br i1 [[CONDLOOP_32]], label [[LOOP_32]], label [[LOOP_25:%.*]]
; CHECK: loop.25:
Expand Down
Loading
Loading