Skip to content

Commit 377ec36

Browse files
authored
[Clang] Bypass TAD during overload resolution if a perfect match exists (#136018)
This implements the same overload resolution behavior as GCC, as described in https://wg21.link/p3606 (section 1-2, not 3) If during overload resolution, there is a non-template candidate that would be always be picked - because each of the argument is a perfect match (ie the source and target types are the same), we do not perform deduction for any template candidate that might exists. The goal is to be able to merge #122423 without being too disruptive. This change means that the selection of the best viable candidate and template argument deduction become interleaved. To avoid rewriting half of Clang we store in `OverloadCandidateSet` enough information to be able to deduce template candidates from `OverloadCandidateSet::BestViableFunction`. Which means the lifetime of any object used by template argument must outlive a call to `Add*Template*Candidate`. This two phase resolution is not performed for some initialization as there are cases where template candidate are better match in these cases per the standard. It's also bypassed for code completion. The change has a nice impact on compile times https://llvm-compile-time-tracker.com/compare.php?from=719b029c16eeb1035da522fd641dfcc4cee6be74&to=bf7041045c9408490c395230047c5461de72fc39&stat=instructions%3Au Fixes #62096 Fixes #74581 Reapplies #133426
1 parent dbb79c3 commit 377ec36

12 files changed

+944
-219
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ C++ Language Changes
9696
asm((std::string_view("nop")) ::: (std::string_view("memory")));
9797
}
9898

99+
- Clang now implements the changes to overload resolution proposed by section 1 and 2 of
100+
`P3606 <https://wg21.link/P3606R0>`_. If a non-template candidate exists in an overload set that is
101+
a perfect match (all conversion sequences are identity conversions) template candidates are not instantiated.
102+
Diagnostics that would have resulted from the instantiation of these template candidates are no longer
103+
produced. This aligns Clang closer to the behavior of GCC, and fixes (#GH62096), (#GH74581), and (#GH74581).
104+
99105
C++2c Feature Support
100106
^^^^^^^^^^^^^^^^^^^^^
101107

clang/include/clang/Sema/Overload.h

Lines changed: 222 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,26 @@ class Sema;
407407
Third == ICK_Identity;
408408
}
409409

410+
/// A conversion sequence is perfect if it is an identity conversion and
411+
/// the type of the source is the same as the type of the target.
412+
bool isPerfect(const ASTContext &C) const {
413+
if (!isIdentityConversion())
414+
return false;
415+
// If we are not performing a reference binding, we can skip comparing
416+
// the types, which has a noticeable performance impact.
417+
if (!ReferenceBinding) {
418+
// The types might differ if there is an array-to-pointer conversion
419+
// or lvalue-to-rvalue conversion.
420+
assert(First || C.hasSameUnqualifiedType(getFromType(), getToType(2)));
421+
return true;
422+
}
423+
if (!C.hasSameType(getFromType(), getToType(2)))
424+
return false;
425+
if (BindsToRvalue && IsLvalueReference)
426+
return false;
427+
return true;
428+
}
429+
410430
ImplicitConversionRank getRank() const;
411431
NarrowingKind
412432
getNarrowingKind(ASTContext &Context, const Expr *Converted,
@@ -743,6 +763,12 @@ class Sema;
743763
Standard.setAllToTypes(T);
744764
}
745765

766+
/// A conversion sequence is perfect if it is an identity conversion and
767+
/// the type of the source is the same as the type of the target.
768+
bool isPerfect(const ASTContext &C) const {
769+
return isStandard() && Standard.isPerfect(C);
770+
}
771+
746772
// True iff this is a conversion sequence from an initializer list to an
747773
// array or std::initializer.
748774
bool hasInitializerListContainerType() const {
@@ -939,6 +965,10 @@ class Sema;
939965
LLVM_PREFERRED_TYPE(CallExpr::ADLCallKind)
940966
unsigned IsADLCandidate : 1;
941967

968+
/// Whether FinalConversion has been set.
969+
LLVM_PREFERRED_TYPE(bool)
970+
unsigned HasFinalConversion : 1;
971+
942972
/// Whether this is a rewritten candidate, and if so, of what kind?
943973
LLVM_PREFERRED_TYPE(OverloadCandidateRewriteKind)
944974
unsigned RewriteKind : 2;
@@ -979,6 +1009,20 @@ class Sema;
9791009
return false;
9801010
}
9811011

1012+
// An overload is a perfect match if the conversion
1013+
// sequences for each argument are perfect.
1014+
bool isPerfectMatch(const ASTContext &Ctx) const {
1015+
if (!Viable)
1016+
return false;
1017+
for (const auto &C : Conversions) {
1018+
if (!C.isInitialized() || !C.isPerfect(Ctx))
1019+
return false;
1020+
}
1021+
if (HasFinalConversion)
1022+
return FinalConversion.isPerfect(Ctx);
1023+
return true;
1024+
}
1025+
9821026
bool TryToFixBadConversion(unsigned Idx, Sema &S) {
9831027
bool CanFix = Fix.tryToFixConversion(
9841028
Conversions[Idx].Bad.FromExpr,
@@ -1012,8 +1056,67 @@ class Sema;
10121056
: IsSurrogate(false), IgnoreObjectArgument(false),
10131057
TookAddressOfOverload(false), StrictPackMatch(false),
10141058
IsADLCandidate(llvm::to_underlying(CallExpr::NotADL)),
1015-
RewriteKind(CRK_None) {}
1059+
HasFinalConversion(false), RewriteKind(CRK_None) {}
1060+
};
1061+
1062+
struct DeferredTemplateOverloadCandidate {
1063+
1064+
// intrusive linked list support for allocateDeferredCandidate
1065+
DeferredTemplateOverloadCandidate *Next = nullptr;
1066+
1067+
enum Kind { Function, Method, Conversion };
1068+
1069+
LLVM_PREFERRED_TYPE(Kind)
1070+
unsigned Kind : 2;
1071+
LLVM_PREFERRED_TYPE(bool)
1072+
unsigned AllowObjCConversionOnExplicit : 1;
1073+
LLVM_PREFERRED_TYPE(bool)
1074+
unsigned AllowResultConversion : 1;
1075+
LLVM_PREFERRED_TYPE(bool)
1076+
unsigned AllowExplicit : 1;
1077+
LLVM_PREFERRED_TYPE(bool)
1078+
unsigned SuppressUserConversions : 1;
1079+
LLVM_PREFERRED_TYPE(bool)
1080+
unsigned PartialOverloading : 1;
1081+
LLVM_PREFERRED_TYPE(bool)
1082+
unsigned AggregateCandidateDeduction : 1;
1083+
};
1084+
1085+
struct DeferredFunctionTemplateOverloadCandidate
1086+
: public DeferredTemplateOverloadCandidate {
1087+
FunctionTemplateDecl *FunctionTemplate;
1088+
DeclAccessPair FoundDecl;
1089+
ArrayRef<Expr *> Args;
1090+
CallExpr::ADLCallKind IsADLCandidate;
1091+
OverloadCandidateParamOrder PO;
1092+
};
1093+
static_assert(std::is_trivially_destructible_v<
1094+
DeferredFunctionTemplateOverloadCandidate>);
1095+
1096+
struct DeferredMethodTemplateOverloadCandidate
1097+
: public DeferredTemplateOverloadCandidate {
1098+
FunctionTemplateDecl *FunctionTemplate;
1099+
DeclAccessPair FoundDecl;
1100+
ArrayRef<Expr *> Args;
1101+
CXXRecordDecl *ActingContext;
1102+
Expr::Classification ObjectClassification;
1103+
QualType ObjectType;
1104+
OverloadCandidateParamOrder PO;
10161105
};
1106+
static_assert(std::is_trivially_destructible_v<
1107+
DeferredMethodTemplateOverloadCandidate>);
1108+
1109+
struct DeferredConversionTemplateOverloadCandidate
1110+
: public DeferredTemplateOverloadCandidate {
1111+
FunctionTemplateDecl *FunctionTemplate;
1112+
DeclAccessPair FoundDecl;
1113+
CXXRecordDecl *ActingContext;
1114+
Expr *From;
1115+
QualType ToType;
1116+
};
1117+
1118+
static_assert(std::is_trivially_destructible_v<
1119+
DeferredConversionTemplateOverloadCandidate>);
10171120

10181121
/// OverloadCandidateSet - A set of overload candidates, used in C++
10191122
/// overload resolution (C++ 13.3).
@@ -1043,6 +1146,11 @@ class Sema;
10431146
/// C++ [over.match.call.general]
10441147
/// Resolve a call through the address of an overload set.
10451148
CSK_AddressOfOverloadSet,
1149+
1150+
/// When doing overload resolution during code completion,
1151+
/// we want to show all viable candidates, including otherwise
1152+
/// deferred template candidates.
1153+
CSK_CodeCompletion,
10461154
};
10471155

10481156
/// Information about operator rewrites to consider when adding operator
@@ -1117,16 +1225,27 @@ class Sema;
11171225
SmallVector<OverloadCandidate, 16> Candidates;
11181226
llvm::SmallPtrSet<uintptr_t, 16> Functions;
11191227

1120-
// Allocator for ConversionSequenceLists. We store the first few of these
1228+
DeferredTemplateOverloadCandidate *FirstDeferredCandidate = nullptr;
1229+
unsigned DeferredCandidatesCount : 8 * sizeof(unsigned) - 2;
1230+
LLVM_PREFERRED_TYPE(bool)
1231+
unsigned HasDeferredTemplateConstructors : 1;
1232+
LLVM_PREFERRED_TYPE(bool)
1233+
unsigned ResolutionByPerfectCandidateIsDisabled : 1;
1234+
1235+
// Allocator for ConversionSequenceLists and deferred candidate args.
1236+
// We store the first few of these
11211237
// inline to avoid allocation for small sets.
11221238
llvm::BumpPtrAllocator SlabAllocator;
11231239

11241240
SourceLocation Loc;
11251241
CandidateSetKind Kind;
11261242
OperatorRewriteInfo RewriteInfo;
11271243

1244+
/// Small storage size for ImplicitConversionSequences
1245+
/// and the persisted arguments of deferred candidates.
11281246
constexpr static unsigned NumInlineBytes =
1129-
24 * sizeof(ImplicitConversionSequence);
1247+
32 * sizeof(ImplicitConversionSequence);
1248+
11301249
unsigned NumInlineBytesUsed = 0;
11311250
alignas(void *) char InlineSpace[NumInlineBytes];
11321251

@@ -1137,15 +1256,13 @@ class Sema;
11371256
/// from the slab allocator.
11381257
/// FIXME: It would probably be nice to have a SmallBumpPtrAllocator
11391258
/// instead.
1140-
/// FIXME: Now that this only allocates ImplicitConversionSequences, do we
1141-
/// want to un-generalize this?
11421259
template <typename T>
11431260
T *slabAllocate(unsigned N) {
11441261
// It's simpler if this doesn't need to consider alignment.
11451262
static_assert(alignof(T) == alignof(void *),
11461263
"Only works for pointer-aligned types.");
1147-
static_assert(std::is_trivial<T>::value ||
1148-
std::is_same<ImplicitConversionSequence, T>::value,
1264+
static_assert(std::is_trivially_destructible_v<T> ||
1265+
(std::is_same_v<ImplicitConversionSequence, T>),
11491266
"Add destruction logic to OverloadCandidateSet::clear().");
11501267

11511268
unsigned NBytes = sizeof(T) * N;
@@ -1159,12 +1276,34 @@ class Sema;
11591276
return reinterpret_cast<T *>(FreeSpaceStart);
11601277
}
11611278

1279+
// Because the size of OverloadCandidateSet has a noticeable impact on
1280+
// performance, we store each deferred template candidate in the slab
1281+
// allocator such that deferred candidates are ultimately a singly-linked
1282+
// intrusive linked list. This ends up being much more efficient than a
1283+
// SmallVector that is empty in the common case.
1284+
template <typename T> T *allocateDeferredCandidate() {
1285+
T *C = slabAllocate<T>(1);
1286+
if (!FirstDeferredCandidate)
1287+
FirstDeferredCandidate = C;
1288+
else {
1289+
auto *F = FirstDeferredCandidate;
1290+
while (F->Next)
1291+
F = F->Next;
1292+
F->Next = C;
1293+
}
1294+
DeferredCandidatesCount++;
1295+
return C;
1296+
}
1297+
11621298
void destroyCandidates();
11631299

11641300
public:
11651301
OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK,
11661302
OperatorRewriteInfo RewriteInfo = {})
1167-
: Loc(Loc), Kind(CSK), RewriteInfo(RewriteInfo) {}
1303+
: FirstDeferredCandidate(nullptr), DeferredCandidatesCount(0),
1304+
HasDeferredTemplateConstructors(false),
1305+
ResolutionByPerfectCandidateIsDisabled(false), Loc(Loc), Kind(CSK),
1306+
RewriteInfo(RewriteInfo) {}
11681307
OverloadCandidateSet(const OverloadCandidateSet &) = delete;
11691308
OverloadCandidateSet &operator=(const OverloadCandidateSet &) = delete;
11701309
~OverloadCandidateSet() { destroyCandidates(); }
@@ -1176,6 +1315,9 @@ class Sema;
11761315
/// Whether diagnostics should be deferred.
11771316
bool shouldDeferDiags(Sema &S, ArrayRef<Expr *> Args, SourceLocation OpLoc);
11781317

1318+
// Whether the resolution of template candidates should be deferred
1319+
bool shouldDeferTemplateArgumentDeduction(const LangOptions &Opts) const;
1320+
11791321
/// Determine when this overload candidate will be new to the
11801322
/// overload set.
11811323
bool isNewCandidate(Decl *F, OverloadCandidateParamOrder PO =
@@ -1199,8 +1341,10 @@ class Sema;
11991341
iterator begin() { return Candidates.begin(); }
12001342
iterator end() { return Candidates.end(); }
12011343

1202-
size_t size() const { return Candidates.size(); }
1203-
bool empty() const { return Candidates.empty(); }
1344+
size_t size() const { return Candidates.size() + DeferredCandidatesCount; }
1345+
bool empty() const {
1346+
return Candidates.empty() && DeferredCandidatesCount == 0;
1347+
}
12041348

12051349
/// Allocate storage for conversion sequences for NumConversions
12061350
/// conversions.
@@ -1216,6 +1360,24 @@ class Sema;
12161360
return ConversionSequenceList(Conversions, NumConversions);
12171361
}
12181362

1363+
/// Provide storage for any Expr* arg that must be preserved
1364+
/// until deferred template candidates are deduced.
1365+
/// Typically this should be used for reversed operator arguments
1366+
/// and any time the argument array is transformed while adding
1367+
/// a template candidate.
1368+
llvm::MutableArrayRef<Expr *> getPersistentArgsArray(unsigned N) {
1369+
Expr **Exprs = slabAllocate<Expr *>(N);
1370+
return llvm::MutableArrayRef<Expr *>(Exprs, N);
1371+
}
1372+
1373+
template <typename... T>
1374+
llvm::MutableArrayRef<Expr *> getPersistentArgsArray(T *...Exprs) {
1375+
llvm::MutableArrayRef<Expr *> Arr =
1376+
getPersistentArgsArray(sizeof...(Exprs));
1377+
llvm::copy(std::initializer_list<Expr *>{Exprs...}, Arr.data());
1378+
return Arr;
1379+
}
1380+
12191381
/// Add a new candidate with NumConversions conversion sequence slots
12201382
/// to the overload set.
12211383
OverloadCandidate &addCandidate(unsigned NumConversions = 0,
@@ -1231,6 +1393,32 @@ class Sema;
12311393
return C;
12321394
}
12331395

1396+
void AddDeferredTemplateCandidate(
1397+
FunctionTemplateDecl *FunctionTemplate, DeclAccessPair FoundDecl,
1398+
ArrayRef<Expr *> Args, bool SuppressUserConversions,
1399+
bool PartialOverloading, bool AllowExplicit,
1400+
CallExpr::ADLCallKind IsADLCandidate, OverloadCandidateParamOrder PO,
1401+
bool AggregateCandidateDeduction);
1402+
1403+
void AddDeferredMethodTemplateCandidate(
1404+
FunctionTemplateDecl *MethodTmpl, DeclAccessPair FoundDecl,
1405+
CXXRecordDecl *ActingContext, QualType ObjectType,
1406+
Expr::Classification ObjectClassification, ArrayRef<Expr *> Args,
1407+
bool SuppressUserConversions, bool PartialOverloading,
1408+
OverloadCandidateParamOrder PO);
1409+
1410+
void AddDeferredConversionTemplateCandidate(
1411+
FunctionTemplateDecl *FunctionTemplate, DeclAccessPair FoundDecl,
1412+
CXXRecordDecl *ActingContext, Expr *From, QualType ToType,
1413+
bool AllowObjCConversionOnExplicit, bool AllowExplicit,
1414+
bool AllowResultConversion);
1415+
1416+
void InjectNonDeducedTemplateCandidates(Sema &S);
1417+
1418+
void DisableResolutionByPerfectCandidate() {
1419+
ResolutionByPerfectCandidateIsDisabled = true;
1420+
}
1421+
12341422
/// Find the best viable function on this overload set, if it exists.
12351423
OverloadingResult BestViableFunction(Sema &S, SourceLocation Loc,
12361424
OverloadCandidateSet::iterator& Best);
@@ -1263,6 +1451,15 @@ class Sema;
12631451
DestAS = AS;
12641452
}
12651453

1454+
private:
1455+
OverloadingResult ResultForBestCandidate(const iterator &Best);
1456+
void CudaExcludeWrongSideCandidates(
1457+
Sema &S, SmallVectorImpl<OverloadCandidate *> &Candidates);
1458+
OverloadingResult
1459+
BestViableFunctionImpl(Sema &S, SourceLocation Loc,
1460+
OverloadCandidateSet::iterator &Best);
1461+
void PerfectViableFunction(Sema &S, SourceLocation Loc,
1462+
OverloadCandidateSet::iterator &Best);
12661463
};
12671464

12681465
bool isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
@@ -1311,6 +1508,21 @@ class Sema;
13111508
// parameter.
13121509
bool shouldEnforceArgLimit(bool PartialOverloading, FunctionDecl *Function);
13131510

1511+
inline bool OverloadCandidateSet::shouldDeferTemplateArgumentDeduction(
1512+
const LangOptions &Opts) const {
1513+
return
1514+
// For user defined conversion we need to check against different
1515+
// combination of CV qualifiers and look at any explicit specifier, so
1516+
// always deduce template candidates.
1517+
Kind != CSK_InitByUserDefinedConversion
1518+
// When doing code completion, we want to see all the
1519+
// viable candidates.
1520+
&& Kind != CSK_CodeCompletion
1521+
// CUDA may prefer template candidates even when a non-candidate
1522+
// is a perfect match
1523+
&& !Opts.CUDA;
1524+
}
1525+
13141526
} // namespace clang
13151527

13161528
#endif // LLVM_CLANG_SEMA_OVERLOAD_H

clang/lib/Sema/SemaCodeComplete.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6354,7 +6354,8 @@ SemaCodeCompletion::ProduceCallSignatureHelp(Expr *Fn, ArrayRef<Expr *> Args,
63546354
Expr *NakedFn = Fn->IgnoreParenCasts();
63556355
// Build an overload candidate set based on the functions we find.
63566356
SourceLocation Loc = Fn->getExprLoc();
6357-
OverloadCandidateSet CandidateSet(Loc, OverloadCandidateSet::CSK_Normal);
6357+
OverloadCandidateSet CandidateSet(Loc,
6358+
OverloadCandidateSet::CSK_CodeCompletion);
63586359

63596360
if (auto ULE = dyn_cast<UnresolvedLookupExpr>(NakedFn)) {
63606361
SemaRef.AddOverloadedCallCandidates(ULE, ArgsWithoutDependentTypes,
@@ -6557,7 +6558,8 @@ QualType SemaCodeCompletion::ProduceConstructorSignatureHelp(
65576558
// FIXME: Provide support for variadic template constructors.
65586559

65596560
if (CRD) {
6560-
OverloadCandidateSet CandidateSet(Loc, OverloadCandidateSet::CSK_Normal);
6561+
OverloadCandidateSet CandidateSet(Loc,
6562+
OverloadCandidateSet::CSK_CodeCompletion);
65616563
for (NamedDecl *C : SemaRef.LookupConstructors(CRD)) {
65626564
if (auto *FD = dyn_cast<FunctionDecl>(C)) {
65636565
// FIXME: we can't yet provide correct signature help for initializer

0 commit comments

Comments
 (0)