Skip to content

[coro][CoroSplit] Use llvm.lifetime.end to compute putting objects on the frame vs the stack #90265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions llvm/include/llvm/Analysis/CFG.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ bool isPotentiallyReachableFromMany(
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);

/// Determine whether there is a potentially a path from at least one block in
/// 'Worklist' to at least one block in 'StopSet' within a single function
/// without passing through any of the blocks in 'ExclusionSet'. Returns false
/// only if we can prove that once any block in 'Worklist' has been reached then
/// no blocks in 'StopSet' can be executed without passing through any blocks in
/// 'ExclusionSet'. Conservatively returns true.
bool isManyPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist,
const SmallPtrSetImpl<const BasicBlock *> &StopSet,
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);

/// Return true if the control flow in \p RPOTraversal is irreducible.
///
/// This is a generic implementation to detect CFG irreducibility based on loop
Expand Down
32 changes: 25 additions & 7 deletions llvm/lib/Analysis/CFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,21 @@ bool llvm::isPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist, const BasicBlock *StopBB,
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
const LoopInfo *LI) {
// When the stop block is unreachable, it's dominated from everywhere,
return isManyPotentiallyReachableFromMany(
Worklist, llvm::SmallPtrSet<const BasicBlock *, 1>{StopBB}, ExclusionSet,
DT, LI);
}

bool llvm::isManyPotentiallyReachableFromMany(
SmallVectorImpl<BasicBlock *> &Worklist,
const SmallPtrSetImpl<const BasicBlock *> &StopSet,
const SmallPtrSetImpl<BasicBlock *> *ExclusionSet, const DominatorTree *DT,
const LoopInfo *LI) {
// When a stop block is unreachable, it's dominated from everywhere,
// regardless of whether there's a path between the two blocks.
if (DT && !DT->isReachableFromEntry(StopBB))
DT = nullptr;
llvm::DenseMap<const BasicBlock *, bool> StopBBReachable;
for (auto *BB : StopSet)
StopBBReachable[BB] = DT && DT->isReachableFromEntry(BB);

// We can't skip directly from a block that dominates the stop block if the
// exclusion block is potentially in between.
Expand All @@ -155,19 +166,23 @@ bool llvm::isPotentiallyReachableFromMany(
}
}

const Loop *StopLoop = LI ? getOutermostLoop(LI, StopBB) : nullptr;
llvm::DenseMap<const BasicBlock *, const Loop *> StopLoops;
for (auto *StopBB : StopSet)
StopLoops[StopBB] = LI ? getOutermostLoop(LI, StopBB) : nullptr;

unsigned Limit = DefaultMaxBBsToExplore;
SmallPtrSet<const BasicBlock*, 32> Visited;
do {
BasicBlock *BB = Worklist.pop_back_val();
if (!Visited.insert(BB).second)
continue;
if (BB == StopBB)
if (StopSet.contains(BB))
return true;
if (ExclusionSet && ExclusionSet->count(BB))
continue;
if (DT && DT->dominates(BB, StopBB))
if (DT && llvm::any_of(StopSet, [&](const BasicBlock *StopBB) {
return StopBBReachable[BB] && DT->dominates(BB, StopBB);
}))
return true;

const Loop *Outer = nullptr;
Expand All @@ -179,7 +194,10 @@ bool llvm::isPotentiallyReachableFromMany(
// excluded block. Clear Outer so we process BB's successors.
if (LoopsWithHoles.count(Outer))
Outer = nullptr;
if (StopLoop && Outer == StopLoop)
if (llvm::any_of(StopSet, [&](const BasicBlock *StopBB) {
const Loop *StopLoop = StopLoops[StopBB];
return StopLoop && StopLoop == Outer;
}))
return true;
}

Expand Down
60 changes: 43 additions & 17 deletions llvm/lib/Transforms/Coroutines/CoroFrame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Config/llvm-config.h"
Expand Down Expand Up @@ -1440,17 +1441,22 @@ namespace {
struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
using Base = PtrUseVisitor<AllocaUseVisitor>;
AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
const coro::Shape &CoroShape,
const SuspendCrossingInfo &Checker,
bool ShouldUseLifetimeStartInfo)
: PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
: PtrUseVisitor(DL), DT(DT), CoroShape(CoroShape), Checker(Checker),
ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {
for (AnyCoroSuspendInst *SuspendInst : CoroShape.CoroSuspends)
CoroSuspendBBs.insert(SuspendInst->getParent());
}

void visit(Instruction &I) {
Users.insert(&I);
Base::visit(I);
// If the pointer is escaped prior to CoroBegin, we have to assume it would
// be written into before CoroBegin as well.
if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) {
if (PI.isEscaped() &&
!DT.dominates(CoroShape.CoroBegin, PI.getEscapingInst())) {
MayWriteBeforeCoroBegin = true;
}
}
Expand Down Expand Up @@ -1553,10 +1559,19 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// When we found the lifetime markers refers to a
// subrange of the original alloca, ignore the lifetime
// markers to avoid misleading the analysis.
if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
!Offset.isZero())
if (!IsOffsetKnown || !Offset.isZero())
return Base::visitIntrinsicInst(II);
switch (II.getIntrinsicID()) {
default:
return Base::visitIntrinsicInst(II);
LifetimeStarts.insert(&II);
case Intrinsic::lifetime_start:
LifetimeStarts.insert(&II);
LifetimeStartBBs.push_back(II.getParent());
break;
case Intrinsic::lifetime_end:
LifetimeEndBBs.insert(II.getParent());
break;
}
}

void visitCallBase(CallBase &CB) {
Expand Down Expand Up @@ -1586,14 +1601,17 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {

private:
const DominatorTree &DT;
const CoroBeginInst &CoroBegin;
const coro::Shape &CoroShape;
const SuspendCrossingInfo &Checker;
// All alias to the original AllocaInst, created before CoroBegin and used
// after CoroBegin. Each entry contains the instruction and the offset in the
// original Alloca. They need to be recreated after CoroBegin off the frame.
DenseMap<Instruction *, std::optional<APInt>> AliasOffetMap{};
SmallPtrSet<Instruction *, 4> Users{};
SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
SmallVector<BasicBlock *> LifetimeStartBBs{};
SmallPtrSet<BasicBlock *, 2> LifetimeEndBBs{};
SmallPtrSet<const BasicBlock *, 2> CoroSuspendBBs{};
bool MayWriteBeforeCoroBegin{false};
bool ShouldUseLifetimeStartInfo{true};

Expand All @@ -1605,10 +1623,19 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// every basic block that uses the pointer to see if they cross suspension
// points. The uses cover both direct uses as well as indirect uses.
if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
for (auto *I : Users)
for (auto *S : LifetimeStarts)
if (Checker.isDefinitionAcrossSuspend(*S, I))
return true;
// If there is no explicit lifetime.end, then assume the address can
// cross suspension points.
if (LifetimeEndBBs.empty())
return true;

// If there is a path from a lifetime.start to a suspend without a
// corresponding lifetime.end, then the alloca's lifetime persists
// beyond that suspension point and the alloca must go on the frame.
llvm::SmallVector<BasicBlock *> Worklist(LifetimeStartBBs);
if (isManyPotentiallyReachableFromMany(Worklist, CoroSuspendBBs,
&LifetimeEndBBs, &DT))
return true;

// Addresses are guaranteed to be identical after every lifetime.start so
// we cannot use the local stack if the address escaped and there is a
// suspend point between lifetime markers. This should also cover the
Expand Down Expand Up @@ -1646,13 +1673,13 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
}

void handleMayWrite(const Instruction &I) {
if (!DT.dominates(&CoroBegin, &I))
if (!DT.dominates(CoroShape.CoroBegin, &I))
MayWriteBeforeCoroBegin = true;
}

bool usedAfterCoroBegin(Instruction &I) {
for (auto &U : I.uses())
if (DT.dominates(&CoroBegin, U))
if (DT.dominates(CoroShape.CoroBegin, U))
return true;
return false;
}
Expand All @@ -1661,7 +1688,7 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
// We track all aliases created prior to CoroBegin but used after.
// These aliases may need to be recreated after CoroBegin if the alloca
// need to live on the frame.
if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I))
if (DT.dominates(CoroShape.CoroBegin, &I) || !usedAfterCoroBegin(I))
return;

if (!IsOffsetKnown) {
Expand Down Expand Up @@ -2830,8 +2857,7 @@ static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
bool ShouldUseLifetimeStartInfo =
(Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
Shape.ABI != coro::ABI::RetconOnce);
AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT,
*Shape.CoroBegin, Checker,
AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT, Shape, Checker,
ShouldUseLifetimeStartInfo};
Visitor.visitPtr(*AI);
if (!Visitor.getShouldLiveOnFrame())
Expand Down
142 changes: 142 additions & 0 deletions llvm/test/Transforms/Coroutines/coro-lifetime-end.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

declare ptr @malloc(i64)

%i8.array = type { [100 x i8] }
declare void @consume.i8.array(ptr)

@testbool = external local_unnamed_addr global i8, align 1

; testval does not contain an explicit lifetime end. We must assume that it may
; live across suspension.
define void @HasNoLifetimeEnd() presplitcoroutine {
; CHECK-LABEL: define void @HasNoLifetimeEnd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @HasNoLifetimeEnd.resumers)
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
; CHECK-NEXT: store ptr @HasNoLifetimeEnd.resume, ptr [[VFRAME]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
; CHECK-NEXT: store ptr @HasNoLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[HASNOLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
; CHECK-NEXT: ret void
;
entry:
%testval = alloca %i8.array
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
call void @consume.i8.array(ptr %testval)

%save = call token @llvm.coro.save(ptr null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
br label %exit
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
}

define void @LifetimeEndAfterCoroEnd() presplitcoroutine {
; CHECK-LABEL: define void @LifetimeEndAfterCoroEnd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @LifetimeEndAfterCoroEnd.resumers)
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.resume, ptr [[VFRAME]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
; CHECK-NEXT: store ptr @LifetimeEndAfterCoroEnd.destroy, ptr [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
; CHECK-NEXT: call void @consume.i8.array(ptr [[INDEX_ADDR1]])
; CHECK-NEXT: [[INDEX_ADDR2:%.*]] = getelementptr inbounds [[LIFETIMEENDAFTERCOROEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR2]], align 1
; CHECK-NEXT: ret void
;
entry:
%testval = alloca %i8.array
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
call void @consume.i8.array(ptr %testval)

%save = call token @llvm.coro.save(ptr null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
br label %exit
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
ret void
}

define void @BranchWithoutLifetimeEnd() presplitcoroutine {
; CHECK-LABEL: define void @BranchWithoutLifetimeEnd() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ID:%.*]] = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr @BranchWithoutLifetimeEnd.resumers)
; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 16)
; CHECK-NEXT: [[VFRAME:%.*]] = call noalias nonnull ptr @llvm.coro.begin(token [[ID]], ptr [[ALLOC]])
; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.resume, ptr [[VFRAME]], align 8
; CHECK-NEXT: [[DESTROY_ADDR:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME:%.*]], ptr [[VFRAME]], i32 0, i32 1
; CHECK-NEXT: store ptr @BranchWithoutLifetimeEnd.destroy, ptr [[DESTROY_ADDR]], align 8
; CHECK-NEXT: [[TESTVAL:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 2
; CHECK-NEXT: call void @consume.i8.array(ptr [[TESTVAL]])
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @testbool, align 1
; CHECK-NEXT: [[INDEX_ADDR1:%.*]] = getelementptr inbounds [[BRANCHWITHOUTLIFETIMEEND_FRAME]], ptr [[VFRAME]], i32 0, i32 3
; CHECK-NEXT: store i1 false, ptr [[INDEX_ADDR1]], align 1
; CHECK-NEXT: ret void
;
entry:
%testval = alloca %i8.array
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
%alloc = call ptr @malloc(i64 16) #3
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)

call void @llvm.lifetime.start.p0(i64 100, ptr %testval)
call void @consume.i8.array(ptr %testval)

%0 = load i8, ptr @testbool, align 1
%tobool = trunc nuw i8 %0 to i1
br i1 %tobool, label %if.then, label %if.end

if.then:
call void @llvm.lifetime.end.p0(i64 100, ptr %testval)
br label %if.end

if.end:
%save = call token @llvm.coro.save(ptr null)
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
switch i8 %suspend, label %exit [
i8 0, label %await.ready
i8 1, label %exit
]
await.ready:
br label %exit
exit:
call i1 @llvm.coro.end(ptr null, i1 false, token none)
ret void
}


declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr)
declare ptr @llvm.coro.begin(token, ptr writeonly) #3
declare ptr @llvm.coro.frame() #5
declare i8 @llvm.coro.suspend(token, i1) #3
declare i1 @llvm.coro.end(ptr, i1, token) #3
declare void @llvm.lifetime.start.p0(i64, ptr nocapture) #4
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) #4
Loading