Skip to content

[Clang] C++20 Coroutines: Introduce Frontend Attribute [[clang::coro_await_elidable]] #99282

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,9 @@ Attribute Changes in Clang
instantiation by accidentally allowing it in C++ in some circumstances.
(#GH106864)

- Introduced a new attribute ``[[clang::coro_await_elidable]]`` on coroutine return types
to express elideability at call sites where the coroutine is co_awaited as a prvalue.

Improvements to Clang's diagnostics
-----------------------------------

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/AST/Expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -2991,6 +2991,9 @@ class CallExpr : public Expr {

bool hasStoredFPFeatures() const { return CallExprBits.HasFPFeatures; }

bool isCoroElideSafe() const { return CallExprBits.IsCoroElideSafe; }
void setCoroElideSafe(bool V = true) { CallExprBits.IsCoroElideSafe = V; }

Decl *getCalleeDecl() { return getCallee()->getReferencedDeclOfCallee(); }
const Decl *getCalleeDecl() const {
return getCallee()->getReferencedDeclOfCallee();
Expand Down
5 changes: 4 additions & 1 deletion clang/include/clang/AST/Stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,11 @@ class alignas(void *) Stmt {
LLVM_PREFERRED_TYPE(bool)
unsigned HasFPFeatures : 1;

/// True if the call expression is a must-elide call to a coroutine.
unsigned IsCoroElideSafe : 1;

/// Padding used to align OffsetToTrailingObjects to a byte multiple.
unsigned : 24 - 3 - NumExprBits;
unsigned : 24 - 4 - NumExprBits;

/// The offset in bytes from the this pointer to the start of the
/// trailing objects belonging to CallExpr. Intentionally byte sized
Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,14 @@ def CoroDisableLifetimeBound : InheritableAttr {
let SimpleHandler = 1;
}

def CoroAwaitElidable : InheritableAttr {
let Spellings = [Clang<"coro_await_elidable">];
let Subjects = SubjectList<[CXXRecord]>;
let LangOpts = [CPlusPlus];
let Documentation = [CoroAwaitElidableDoc];
let SimpleHandler = 1;
}

// OSObject-based attributes.
def OSConsumed : InheritableParamAttr {
let Spellings = [Clang<"os_consumed">];
Expand Down
33 changes: 32 additions & 1 deletion clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -8255,6 +8255,38 @@ but do not pass them to the underlying coroutine or pass them by value.
}];
}

def CoroAwaitElidableDoc : Documentation {
let Category = DocCatDecl;
let Content = [{
The ``[[clang::coro_await_elidable]]`` is a class attribute which can be applied
to a coroutine return type.

When a coroutine function that returns such a type calls another coroutine function,
the compiler performs heap allocation elision when the call to the coroutine function
is immediately co_awaited as a prvalue. In this case, the coroutine frame for the
callee will be a local variable within the enclosing braces in the caller's stack
frame. And the local variable, like other variables in coroutines, may be collected
into the coroutine frame, which may be allocated on the heap.

Example:

.. code-block:: c++

class [[clang::coro_await_elidable]] Task { ... };

Task foo();
Task bar() {
co_await foo(); // foo()'s coroutine frame on this line is elidable
auto t = foo(); // foo()'s coroutine frame on this line is NOT elidable
co_await t;
}

The behavior is undefined if the caller coroutine is destroyed earlier than the
callee coroutine.

}];
}

def CountedByDocs : Documentation {
let Category = DocCatField;
let Content = [{
Expand Down Expand Up @@ -8414,4 +8446,3 @@ Declares that a function potentially allocates heap memory, and prevents any pot
of ``nonallocating`` by the compiler.
}];
}

2 changes: 2 additions & 0 deletions clang/lib/AST/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1475,6 +1475,7 @@ CallExpr::CallExpr(StmtClass SC, Expr *Fn, ArrayRef<Expr *> PreArgs,
this->computeDependence();

CallExprBits.HasFPFeatures = FPFeatures.requiresTrailingStorage();
CallExprBits.IsCoroElideSafe = false;
if (hasStoredFPFeatures())
setStoredFPFeatures(FPFeatures);
}
Expand All @@ -1490,6 +1491,7 @@ CallExpr::CallExpr(StmtClass SC, unsigned NumPreArgs, unsigned NumArgs,
assert((CallExprBits.OffsetToTrailingObjects == OffsetToTrailingObjects) &&
"OffsetToTrailingObjects overflow!");
CallExprBits.HasFPFeatures = HasFPFeatures;
CallExprBits.IsCoroElideSafe = false;
}

CallExpr *CallExpr::Create(const ASTContext &Ctx, Expr *Fn,
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/CodeGen/CGBlocks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,7 +1163,8 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
}

RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
ReturnValueSlot ReturnValue,
llvm::CallBase **CallOrInvoke) {
const auto *BPT = E->getCallee()->getType()->castAs<BlockPointerType>();
llvm::Value *BlockPtr = EmitScalarExpr(E->getCallee());
llvm::Type *GenBlockTy = CGM.getGenericBlockLiteralType();
Expand Down Expand Up @@ -1220,7 +1221,7 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
CGCallee Callee(CGCalleeInfo(), Func);

// And call the block.
return EmitCall(FnInfo, Callee, ReturnValue, Args);
return EmitCall(FnInfo, Callee, ReturnValue, Args, CallOrInvoke);
}

Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable) {
Expand Down
5 changes: 3 additions & 2 deletions clang/lib/CodeGen/CGCUDARuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ CGCUDARuntime::~CGCUDARuntime() {}

RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue) {
ReturnValueSlot ReturnValue,
llvm::CallBase **CallOrInvoke) {
llvm::BasicBlock *ConfigOKBlock = CGF.createBasicBlock("kcall.configok");
llvm::BasicBlock *ContBlock = CGF.createBasicBlock("kcall.end");

Expand All @@ -35,7 +36,7 @@ RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,

eval.begin(CGF);
CGF.EmitBlock(ConfigOKBlock);
CGF.EmitSimpleCallExpr(E, ReturnValue);
CGF.EmitSimpleCallExpr(E, ReturnValue, CallOrInvoke);
CGF.EmitBranch(ContBlock);

CGF.EmitBlock(ContBlock);
Expand Down
8 changes: 5 additions & 3 deletions clang/lib/CodeGen/CGCUDARuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/IR/GlobalValue.h"

namespace llvm {
class CallBase;
class Function;
class GlobalVariable;
}
Expand Down Expand Up @@ -82,9 +83,10 @@ class CGCUDARuntime {
CGCUDARuntime(CodeGenModule &CGM) : CGM(CGM) {}
virtual ~CGCUDARuntime();

virtual RValue EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue);
virtual RValue
EmitCUDAKernelCallExpr(CodeGenFunction &CGF, const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue,
llvm::CallBase **CallOrInvoke = nullptr);

/// Emits a kernel launch stub.
virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
Expand Down
10 changes: 5 additions & 5 deletions clang/lib/CodeGen/CGCXXABI.h
Original file line number Diff line number Diff line change
Expand Up @@ -485,11 +485,11 @@ class CGCXXABI {
llvm::PointerUnion<const CXXDeleteExpr *, const CXXMemberCallExpr *>;

/// Emit the ABI-specific virtual destructor call.
virtual llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
const CXXDestructorDecl *Dtor,
CXXDtorType DtorType,
Address This,
DeleteOrMemberCallExpr E) = 0;
virtual llvm::Value *
EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor,
CXXDtorType DtorType, Address This,
DeleteOrMemberCallExpr E,
llvm::CallBase **CallOrInvoke) = 0;

virtual void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF,
GlobalDecl GD,
Expand Down
16 changes: 6 additions & 10 deletions clang/lib/CodeGen/CGClass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2192,15 +2192,11 @@ static bool canEmitDelegateCallArgs(CodeGenFunction &CGF,
return true;
}

void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
CXXCtorType Type,
bool ForVirtualBase,
bool Delegating,
Address This,
CallArgList &Args,
AggValueSlot::Overlap_t Overlap,
SourceLocation Loc,
bool NewPointerIsChecked) {
void CodeGenFunction::EmitCXXConstructorCall(
const CXXConstructorDecl *D, CXXCtorType Type, bool ForVirtualBase,
bool Delegating, Address This, CallArgList &Args,
AggValueSlot::Overlap_t Overlap, SourceLocation Loc,
bool NewPointerIsChecked, llvm::CallBase **CallOrInvoke) {
const CXXRecordDecl *ClassDecl = D->getParent();

if (!NewPointerIsChecked)
Expand Down Expand Up @@ -2248,7 +2244,7 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
const CGFunctionInfo &Info = CGM.getTypes().arrangeCXXConstructorCall(
Args, D, Type, ExtraArgs.Prefix, ExtraArgs.Suffix, PassPrototypeArgs);
CGCallee Callee = CGCallee::forDirect(CalleePtr, GlobalDecl(D, Type));
EmitCall(Info, Callee, ReturnValueSlot(), Args, nullptr, false, Loc);
EmitCall(Info, Callee, ReturnValueSlot(), Args, CallOrInvoke, false, Loc);

// Generate vtable assumptions if we're constructing a complete object
// with a vtable. We don't do this for base subobjects for two reasons:
Expand Down
55 changes: 39 additions & 16 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Intrinsics.h"
Expand Down Expand Up @@ -5544,24 +5545,38 @@ RValue CodeGenFunction::EmitRValueForField(LValue LV,
//===--------------------------------------------------------------------===//

RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
ReturnValueSlot ReturnValue,
llvm::CallBase **CallOrInvoke) {
llvm::CallBase *CallOrInvokeStorage;
if (!CallOrInvoke) {
CallOrInvoke = &CallOrInvokeStorage;
}

auto AddCoroElideSafeOnExit = llvm::make_scope_exit([&] {
if (E->isCoroElideSafe()) {
auto *I = *CallOrInvoke;
if (I)
I->addFnAttr(llvm::Attribute::CoroElideSafe);
}
});

// Builtins never have block type.
if (E->getCallee()->getType()->isBlockPointerType())
return EmitBlockCallExpr(E, ReturnValue);
return EmitBlockCallExpr(E, ReturnValue, CallOrInvoke);

if (const auto *CE = dyn_cast<CXXMemberCallExpr>(E))
return EmitCXXMemberCallExpr(CE, ReturnValue);
return EmitCXXMemberCallExpr(CE, ReturnValue, CallOrInvoke);

if (const auto *CE = dyn_cast<CUDAKernelCallExpr>(E))
return EmitCUDAKernelCallExpr(CE, ReturnValue);
return EmitCUDAKernelCallExpr(CE, ReturnValue, CallOrInvoke);

// A CXXOperatorCallExpr is created even for explicit object methods, but
// these should be treated like static function call.
if (const auto *CE = dyn_cast<CXXOperatorCallExpr>(E))
if (const auto *MD =
dyn_cast_if_present<CXXMethodDecl>(CE->getCalleeDecl());
MD && MD->isImplicitObjectMemberFunction())
return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue);
return EmitCXXOperatorMemberCallExpr(CE, MD, ReturnValue, CallOrInvoke);

CGCallee callee = EmitCallee(E->getCallee());

Expand All @@ -5574,14 +5589,17 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
return EmitCXXPseudoDestructorExpr(callee.getPseudoDestructorExpr());
}

return EmitCall(E->getCallee()->getType(), callee, E, ReturnValue);
return EmitCall(E->getCallee()->getType(), callee, E, ReturnValue,
/*Chain=*/nullptr, CallOrInvoke);
}

/// Emit a CallExpr without considering whether it might be a subclass.
RValue CodeGenFunction::EmitSimpleCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
ReturnValueSlot ReturnValue,
llvm::CallBase **CallOrInvoke) {
CGCallee Callee = EmitCallee(E->getCallee());
return EmitCall(E->getCallee()->getType(), Callee, E, ReturnValue);
return EmitCall(E->getCallee()->getType(), Callee, E, ReturnValue,
/*Chain=*/nullptr, CallOrInvoke);
}

// Detect the unusual situation where an inline version is shadowed by a
Expand Down Expand Up @@ -5785,8 +5803,9 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
llvm_unreachable("bad evaluation kind");
}

LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) {
RValue RV = EmitCallExpr(E);
LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E,
llvm::CallBase **CallOrInvoke) {
RValue RV = EmitCallExpr(E, ReturnValueSlot(), CallOrInvoke);

if (!RV.isScalar())
return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
Expand Down Expand Up @@ -5909,9 +5928,11 @@ LValue CodeGenFunction::EmitStmtExprLValue(const StmtExpr *E) {
AlignmentSource::Decl);
}

RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee,
const CallExpr *E, ReturnValueSlot ReturnValue,
llvm::Value *Chain) {
RValue CodeGenFunction::EmitCall(QualType CalleeType,
const CGCallee &OrigCallee, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Value *Chain,
llvm::CallBase **CallOrInvoke) {
// Get the actual function type. The callee type will always be a pointer to
// function type or a block pointer type.
assert(CalleeType->isFunctionPointerType() &&
Expand Down Expand Up @@ -6131,8 +6152,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
Address(Handle, Handle->getType(), CGM.getPointerAlign()));
Callee.setFunctionPointer(Stub);
}
llvm::CallBase *CallOrInvoke = nullptr;
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &CallOrInvoke,
llvm::CallBase *LocalCallOrInvoke = nullptr;
RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &LocalCallOrInvoke,
E == MustTailCall, E->getExprLoc());

// Generate function declaration DISuprogram in order to be used
Expand All @@ -6141,11 +6162,13 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee
if (auto *CalleeDecl = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
FunctionArgList Args;
QualType ResTy = BuildFunctionArgList(CalleeDecl, Args);
DI->EmitFuncDeclForCallSite(CallOrInvoke,
DI->EmitFuncDeclForCallSite(LocalCallOrInvoke,
DI->getFunctionType(CalleeDecl, ResTy, Args),
CalleeDecl);
}
}
if (CallOrInvoke)
*CallOrInvoke = LocalCallOrInvoke;

return Call;
}
Expand Down
Loading
Loading