Skip to content

Commit e4c029f

Browse files
committed
Reapply "[coro][CoroSplit] Use llvm.lifetime.end to compute putting objects on the frame vs the stack (llvm#90265)"
This reverts commit 9243841. This reland addresses the performance regressions seen in llvm#90265 by retaining the original definition of `isPotentiallyReachableFromMany(...)` instead of reimplementing it with `isManyPotentiallyReachableFromMany(...)`. Fixes llvm#86580
1 parent 873431a commit e4c029f

File tree

4 files changed

+281
-18
lines changed

4 files changed

+281
-18
lines changed

llvm/include/llvm/Analysis/CFG.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,18 @@ bool isPotentiallyReachableFromMany(
9696
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
9797
const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
9898

99+
/// Determine whether there is a potentially a path from at least one block in
100+
/// 'Worklist' to at least one block in 'StopSet' within a single function
101+
/// without passing through any of the blocks in 'ExclusionSet'. Returns false
102+
/// only if we can prove that once any block in 'Worklist' has been reached then
103+
/// no blocks in 'StopSet' can be executed without passing through any blocks in
104+
/// 'ExclusionSet'. Conservatively returns true.
105+
bool isManyPotentiallyReachableFromMany(
106+
SmallVectorImpl<BasicBlock *> &Worklist,
107+
const SmallPtrSetImpl<const BasicBlock *> &StopSet,
108+
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
109+
const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
110+
99111
/// Return true if the control flow in \p RPOTraversal is irreducible.
100112
///
101113
/// This is a generic implementation to detect CFG irreducibility based on loop

llvm/lib/Analysis/CFG.cpp

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ bool llvm::isPotentiallyReachableFromMany(
158158
const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
159159

160160
unsigned Limit = DefaultMaxBBsToExplore;
161-
SmallPtrSet<const BasicBlock*, 32> Visited;
161+
SmallPtrSet<const BasicBlock *, 32> Visited;
162162
do {
163163
BasicBlock *BB = Worklist.pop_back_val();
164164
if (!Visited.insert(BB).second)
@@ -204,6 +204,89 @@ bool llvm::isPotentiallyReachableFromMany(
204204
return false;
205205
}
206206

207+
bool llvm::isManyPotentiallyReachableFromMany(
208+
SmallVectorImpl<BasicBlock *> &Worklist,
209+
const SmallPtrSetImpl<const BasicBlock *> &StopSet,
210+
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
211+
const LoopInfo *LI) {
212+
// When a stop block is unreachable, it's dominated from everywhere,
213+
// regardless of whether there's a path between the two blocks.
214+
llvm::DenseMap<const BasicBlock *, bool> StopBBReachable;
215+
for (auto *BB : StopSet)
216+
StopBBReachable[BB] = DT && DT->isReachableFromEntry(BB);
217+
218+
// We can't skip directly from a block that dominates the stop block if the
219+
// exclusion block is potentially in between.
220+
if (ExclusionSet && !ExclusionSet->empty())
221+
DT = nullptr;
222+
223+
// Normally any block in a loop is reachable from any other block in a loop,
224+
// however excluded blocks might partition the body of a loop to make that
225+
// untrue.
226+
SmallPtrSet<const Loop *, 8> LoopsWithHoles;
227+
if (LI && ExclusionSet) {
228+
for (auto *BB : *ExclusionSet) {
229+
if (const Loop *L = getOutermostLoop(LI, BB))
230+
LoopsWithHoles.insert(L);
231+
}
232+
}
233+
234+
llvm::DenseMap<const BasicBlock *, const Loop *> StopLoops;
235+
for (auto *StopBB : StopSet)
236+
StopLoops[StopBB] = LI ? getOutermostLoop(LI, StopBB) : nullptr;
237+
238+
unsigned Limit = DefaultMaxBBsToExplore;
239+
SmallPtrSet<const BasicBlock*, 32> Visited;
240+
do {
241+
BasicBlock *BB = Worklist.pop_back_val();
242+
if (!Visited.insert(BB).second)
243+
continue;
244+
if (StopSet.contains(BB))
245+
return true;
246+
if (ExclusionSet && ExclusionSet->count(BB))
247+
continue;
248+
if (DT && llvm::any_of(StopSet, [&](const BasicBlock *StopBB) {
249+
return StopBBReachable[BB] && DT->dominates(BB, StopBB);
250+
}))
251+
return true;
252+
253+
const Loop *Outer = nullptr;
254+
if (LI) {
255+
Outer = getOutermostLoop(LI, BB);
256+
// If we're in a loop with a hole, not all blocks in the loop are
257+
// reachable from all other blocks. That implies we can't simply jump to
258+
// the loop's exit blocks, as that exit might need to pass through an
259+
// excluded block. Clear Outer so we process BB's successors.
260+
if (LoopsWithHoles.count(Outer))
261+
Outer = nullptr;
262+
if (llvm::any_of(StopSet, [&](const BasicBlock *StopBB) {
263+
const Loop *StopLoop = StopLoops[StopBB];
264+
return StopLoop && StopLoop == Outer;
265+
}))
266+
return true;
267+
}
268+
269+
if (!--Limit) {
270+
// We haven't been able to prove it one way or the other. Conservatively
271+
// answer true -- that there is potentially a path.
272+
return true;
273+
}
274+
275+
if (Outer) {
276+
// All blocks in a single loop are reachable from all other blocks. From
277+
// any of these blocks, we can skip directly to the exits of the loop,
278+
// ignoring any other blocks inside the loop body.
279+
Outer->getExitBlocks(Worklist);
280+
} else {
281+
Worklist.append(succ_begin(BB), succ_end(BB));
282+
}
283+
} while (!Worklist.empty());
284+
285+
// We have exhausted all possible paths and are certain that 'To' can not be
286+
// reached from 'From'.
287+
return false;
288+
}
289+
207290
bool llvm::isPotentiallyReachable(
208291
const BasicBlock *A, const BasicBlock *B,
209292
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,

llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/PostOrderIterator.h"
2020
#include "llvm/ADT/ScopeExit.h"
2121
#include "llvm/ADT/SmallString.h"
22+
#include "llvm/Analysis/CFG.h"
2223
#include "llvm/Analysis/PtrUseVisitor.h"
2324
#include "llvm/Analysis/StackLifetime.h"
2425
#include "llvm/Config/llvm-config.h"
@@ -1440,17 +1441,22 @@ namespace {
14401441
struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
14411442
using Base = PtrUseVisitor<AllocaUseVisitor>;
14421443
AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
1443-
const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
1444+
const coro::Shape &CoroShape,
1445+
const SuspendCrossingInfo &Checker,
14441446
bool ShouldUseLifetimeStartInfo)
1445-
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
1446-
ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
1447+
: PtrUseVisitor(DL), DT(DT), CoroShape(CoroShape), Checker(Checker),
1448+
ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {
1449+
for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends)
1450+
CoroSuspendBBs.insert(SuspendInst->getParent());
1451+
}
14471452

14481453
void visit(Instruction &I) {
14491454
Users.insert(&I);
14501455
Base::visit(I);
14511456
// If the pointer is escaped prior to CoroBegin, we have to assume it would
14521457
// be written into before CoroBegin as well.
1453-
if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) {
1458+
if (PI.isEscaped() &&
1459+
!DT.dominates(CoroShape.CoroBegin, PI.getEscapingInst())) {
14541460
MayWriteBeforeCoroBegin = true;
14551461
}
14561462
}
@@ -1553,10 +1559,19 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
15531559
// When we found the lifetime markers refers to a
15541560
// subrange of the original alloca, ignore the lifetime
15551561
// markers to avoid misleading the analysis.
1556-
if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
1557-
!Offset.isZero())
1562+
if (!IsOffsetKnown || !Offset.isZero())
1563+
return Base::visitIntrinsicInst(II);
1564+
switch (II.getIntrinsicID()) {
1565+
default:
15581566
return Base::visitIntrinsicInst(II);
1559-
LifetimeStarts.insert(&II);
1567+
case Intrinsic::lifetime_start:
1568+
LifetimeStarts.insert(&II);
1569+
LifetimeStartBBs.push_back(II.getParent());
1570+
break;
1571+
case Intrinsic::lifetime_end:
1572+
LifetimeEndBBs.insert(II.getParent());
1573+
break;
1574+
}
15601575
}
15611576

15621577
void visitCallBase(CallBase &CB) {
@@ -1586,14 +1601,17 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
15861601

15871602
private:
15881603
const DominatorTree &DT;
1589-
const CoroBeginInst &CoroBegin;
1604+
const coro::Shape &CoroShape;
15901605
const SuspendCrossingInfo &Checker;
15911606
// All alias to the original AllocaInst, created before CoroBegin and used
15921607
// after CoroBegin. Each entry contains the instruction and the offset in the
15931608
// original Alloca. They need to be recreated after CoroBegin off the frame.
15941609
DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
15951610
SmallPtrSet<Instruction *, 4> Users{};
15961611
SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
1612+
SmallVector<BasicBlock *> LifetimeStartBBs{};
1613+
SmallPtrSet<BasicBlock *, 2> LifetimeEndBBs{};
1614+
SmallPtrSet<const BasicBlock *, 2> CoroSuspendBBs{};
15971615
bool MayWriteBeforeCoroBegin{false};
15981616
bool ShouldUseLifetimeStartInfo{true};
15991617

@@ -1605,10 +1623,19 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
16051623
// every basic block that uses the pointer to see if they cross suspension
16061624
// points. The uses cover both direct uses as well as indirect uses.
16071625
if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
1608-
for (auto *I : Users)
1609-
for (auto *S : LifetimeStarts)
1610-
if (Checker.isDefinitionAcrossSuspend(*S, I))
1611-
return true;
1626+
// If there is no explicit lifetime.end, then assume the address can
1627+
// cross suspension points.
1628+
if (LifetimeEndBBs.empty())
1629+
return true;
1630+
1631+
// If there is a path from a lifetime.start to a suspend without a
1632+
// corresponding lifetime.end, then the alloca's lifetime persists
1633+
// beyond that suspension point and the alloca must go on the frame.
1634+
llvm::SmallVector<BasicBlock *> Worklist(LifetimeStartBBs);
1635+
if (isManyPotentiallyReachableFromMany(Worklist, CoroSuspendBBs,
1636+
&LifetimeEndBBs, &DT))
1637+
return true;
1638+
16121639
// Addresses are guaranteed to be identical after every lifetime.start so
16131640
// we cannot use the local stack if the address escaped and there is a
16141641
// suspend point between lifetime markers. This should also cover the
@@ -1646,13 +1673,13 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
16461673
}
16471674

16481675
void handleMayWrite(const Instruction &I) {
1649-
if (!DT.dominates(&CoroBegin, &I))
1676+
if (!DT.dominates(CoroShape.CoroBegin, &I))
16501677
MayWriteBeforeCoroBegin = true;
16511678
}
16521679

16531680
bool usedAfterCoroBegin(Instruction &I) {
16541681
for (auto &U : I.uses())
1655-
if (DT.dominates(&CoroBegin, U))
1682+
if (DT.dominates(CoroShape.CoroBegin, U))
16561683
return true;
16571684
return false;
16581685
}
@@ -1661,7 +1688,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
16611688
// We track all aliases created prior to CoroBegin but used after.
16621689
// These aliases may need to be recreated after CoroBegin if the alloca
16631690
// need to live on the frame.
1664-
if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I))
1691+
if (DT.dominates(CoroShape.CoroBegin, &I) || !usedAfterCoroBegin(I))
16651692
return;
16661693

16671694
if (!IsOffsetKnown) {
@@ -2830,8 +2857,7 @@ static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
28302857
bool ShouldUseLifetimeStartInfo =
28312858
(Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
28322859
Shape.ABI != coro::ABI::RetconOnce);
2833-
AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT,
2834-
*Shape.CoroBegin, Checker,
2860+
AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT, Shape, Checker,
28352861
ShouldUseLifetimeStartInfo};
28362862
Visitor.visitPtr(*AI);
28372863
if (!Visitor.getShouldLiveOnFrame())
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
3+
4+
declare ptr @malloc(i64)
5+
6+
%i8.array = type { [100 x i8] }
7+
declare void @consume.i8.array(ptr)
8+
9+
@testbool = external local_unnamed_addr global i8, align 1
10+
11+
; testval does not contain an explicit lifetime end. We must assume that it may
12+
; live across suspension.
13+
define void @HasNoLifetimeEnd() presplitcoroutine {
14+
; CHECK-LABEL: define void @HasNoLifetimeEnd() {
15+
; CHECK-NEXT: entry:
16+
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @HasNoLifetimeEnd.resumers)
17+
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
18+
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
19+
; CHECK-NEXT: store ptr @HasNoLifetimeEnd.resume, ptr [[VFRAME]], align 8
20+
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
21+
; CHECK-NEXT: store ptr @HasNoLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
22+
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
23+
; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
24+
; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
25+
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
26+
; CHECK-NEXT: ret void
27+
;
28+
entry:
29+
%testval = alloca %i8.array
30+
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
31+
%alloc = call ptr @malloc(i64 16) #3
32+
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
33+
34+
call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
35+
call void @consume.i8.array(ptr %testval)
36+
37+
%save = call token @llvm.coro.save(ptr null)
38+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
39+
switch i8 %suspend, label %exit [
40+
i8 0, label %await.ready
41+
i8 1, label %exit
42+
]
43+
await.ready:
44+
br label %exit
45+
exit:
46+
call i1 @llvm.coro.end(ptr null, i1 false, token none)
47+
ret void
48+
}
49+
50+
define void @LifetimeEndAfterCoroEnd() presplitcoroutine {
51+
; CHECK-LABEL: define void @LifetimeEndAfterCoroEnd() {
52+
; CHECK-NEXT: entry:
53+
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @LifetimeEndAfterCoroEnd.resumers)
54+
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
55+
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
56+
; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.resume, ptr [[VFRAME]], align 8
57+
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
58+
; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.destroy, ptr [[DESTROY_ADDR]], align 8
59+
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
60+
; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
61+
; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
62+
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
63+
; CHECK-NEXT: ret void
64+
;
65+
entry:
66+
%testval = alloca %i8.array
67+
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
68+
%alloc = call ptr @malloc(i64 16) #3
69+
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
70+
71+
call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
72+
call void @consume.i8.array(ptr %testval)
73+
74+
%save = call token @llvm.coro.save(ptr null)
75+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
76+
switch i8 %suspend, label %exit [
77+
i8 0, label %await.ready
78+
i8 1, label %exit
79+
]
80+
await.ready:
81+
br label %exit
82+
exit:
83+
call i1 @llvm.coro.end(ptr null, i1 false, token none)
84+
call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
85+
ret void
86+
}
87+
88+
define void @BranchWithoutLifetimeEnd() presplitcoroutine {
89+
; CHECK-LABEL: define void @BranchWithoutLifetimeEnd() {
90+
; CHECK-NEXT: entry:
91+
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @BranchWithoutLifetimeEnd.resumers)
92+
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
93+
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
94+
; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.resume, ptr [[VFRAME]], align 8
95+
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
96+
; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
97+
; CHECK-NEXT: [[TESTVAL:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
98+
; CHECK-NEXT: call void @consume.i8.array(ptr [[TESTVAL]])
99+
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @testbool, align 1
100+
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
101+
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR1]], align 1
102+
; CHECK-NEXT: ret void
103+
;
104+
entry:
105+
%testval = alloca %i8.array
106+
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
107+
%alloc = call ptr @malloc(i64 16) #3
108+
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
109+
110+
call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
111+
call void @consume.i8.array(ptr %testval)
112+
113+
%0 = load i8, ptr @testbool, align 1
114+
%tobool = trunc nuw i8 %0 to i1
115+
br i1 %tobool, label %if.then, label %if.end
116+
117+
if.then:
118+
call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
119+
br label %if.end
120+
121+
if.end:
122+
%save = call token @llvm.coro.save(ptr null)
123+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
124+
switch i8 %suspend, label %exit [
125+
i8 0, label %await.ready
126+
i8 1, label %exit
127+
]
128+
await.ready:
129+
br label %exit
130+
exit:
131+
call i1 @llvm.coro.end(ptr null, i1 false, token none)
132+
ret void
133+
}
134+
135+
136+
declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr)
137+
declare ptr @llvm.coro.begin(token, ptr writeonly) #3
138+
declare ptr @llvm.coro.frame() #5
139+
declare i8 @llvm.coro.suspend(token, i1) #3
140+
declare i1 @llvm.coro.end(ptr, i1, token) #3
141+
declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #4
142+
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #4

0 commit comments

Comments
 (0)