-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[clang] Constant-evaluate format strings as last resort #135864
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 8 commits
be63894
75d5026
654b959
fe16bf1
e3fc694
7b57085
00dfaeb
ef807c2
75af364
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1920,9 +1920,17 @@ static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info); | |
static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result, | ||
EvalInfo &Info); | ||
static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result); | ||
static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E, | ||
QualType &CharTy, LValue &String); | ||
static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info, | ||
LValue &String, | ||
QualType CharTy, | ||
uint64_t &Offset); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a pretty confusing interface.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Happy to rename There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry about that, I meant if(StringLiteral* SL = GetLValueAsStringLiteral(LValue))
Size = EvaluateStringLiteralSize(SL, Offset); I believe the second method never fails but I might be wrong about that, use optional if it can. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The explanation makes sense now, thank you. From what I can tell, though, |
||
template <typename CharAction> | ||
static bool IterateStringLValue(EvalInfo &Info, const Expr *E, QualType CharTy, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a comment? Maybe call it |
||
LValue &String, CharAction &&Action); | ||
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, | ||
EvalInfo &Info, | ||
std::string *StringResult = nullptr); | ||
EvalInfo &Info); | ||
|
||
/// Evaluate an integer or fixed point expression into an APResult. | ||
static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result, | ||
|
@@ -17950,19 +17958,46 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, | |
return tryEvaluateBuiltinObjectSize(this, Type, Info, Result); | ||
} | ||
|
||
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, | ||
EvalInfo &Info, std::string *StringResult) { | ||
if (!E->getType()->hasPointerRepresentation() || !E->isPRValue()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a better early exit would be: if (!Ty->hasPointerRepresentation() && !Ty->canDecayToPointerType())
return false; This would eliminate the need for the I think leaving the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mh, I don't love checking the same thing twice. I reorganized things a little differently to avoid the |
||
static bool EvaluateStringAsLValue(EvalInfo &Info, const Expr *E, | ||
QualType &CharTy, LValue &String) { | ||
QualType Ty = E->getType(); | ||
if (!E->isPRValue()) | ||
return false; | ||
|
||
LValue String; | ||
if (Ty->canDecayToPointerType()) { | ||
if (E->isGLValue()) { | ||
if (!EvaluateLValue(E, String, Info)) | ||
return false; | ||
} else { | ||
APValue &Value = Info.CurrentCall->createTemporary( | ||
E, Ty, ScopeKind::FullExpression, String); | ||
if (!EvaluateInPlace(Value, Info, String, E)) | ||
return false; | ||
} | ||
// The result is a pointer to the first element of the array. | ||
auto *AT = Info.Ctx.getAsArrayType(Ty); | ||
CharTy = AT->getElementType(); | ||
if (auto *CAT = dyn_cast<ConstantArrayType>(AT)) | ||
String.addArray(Info, E, CAT); | ||
else | ||
String.addUnsizedArray(Info, E, CharTy); | ||
return true; | ||
} | ||
|
||
if (!EvaluatePointer(E, String, Info)) | ||
return false; | ||
if (Ty->hasPointerRepresentation()) { | ||
if (!EvaluatePointer(E, String, Info)) | ||
return false; | ||
CharTy = Ty->getPointeeType(); | ||
return true; | ||
} | ||
|
||
QualType CharTy = E->getType()->getPointeeType(); | ||
return false; | ||
} | ||
|
||
// Fast path: if it's a string literal, search the string value. | ||
static const StringLiteral *StringLValueIsLiteral(EvalInfo &Info, | ||
LValue &String, | ||
QualType CharTy, | ||
uint64_t &Offset) { | ||
if (const StringLiteral *S = dyn_cast_or_null<StringLiteral>( | ||
String.getLValueBase().dyn_cast<const Expr *>())) { | ||
StringRef Str = S->getBytes(); | ||
|
@@ -17971,46 +18006,117 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, | |
S->getCharByteWidth() == 1 && | ||
// FIXME: Add fast-path for wchar_t too. | ||
Info.Ctx.hasSameUnqualifiedType(CharTy, Info.Ctx.CharTy)) { | ||
Str = Str.substr(Off); | ||
|
||
StringRef::size_type Pos = Str.find(0); | ||
if (Pos != StringRef::npos) | ||
Str = Str.substr(0, Pos); | ||
|
||
Result = Str.size(); | ||
if (StringResult) | ||
*StringResult = Str; | ||
return true; | ||
Offset = static_cast<uint64_t>(Off); | ||
return S; | ||
} | ||
|
||
// Fall through to slow path. | ||
} | ||
return nullptr; | ||
} | ||
|
||
// Slow path: scan the bytes of the string looking for the terminating 0. | ||
for (uint64_t Strlen = 0; /**/; ++Strlen) { | ||
template <typename CharAction> | ||
static bool IterateStringLValue(EvalInfo &Info, const Expr *E, QualType CharTy, | ||
LValue &String, CharAction &&Action) { | ||
while (true) { | ||
APValue Char; | ||
if (!handleLValueToRValueConversion(Info, E, CharTy, String, Char) || | ||
!Char.isInt()) | ||
return false; | ||
if (!Char.getInt()) { | ||
Result = Strlen; | ||
if (!Action(Char.getInt().getExtValue())) | ||
return true; | ||
} else if (StringResult) | ||
StringResult->push_back(Char.getInt().getExtValue()); | ||
if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1)) | ||
return false; | ||
} | ||
} | ||
|
||
std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const { | ||
static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result, | ||
EvalInfo &Info) { | ||
LValue String; | ||
QualType CharTy; | ||
if (!EvaluateStringAsLValue(Info, E, CharTy, String)) | ||
return false; | ||
|
||
// Fast path: if it's a string literal, search the string value. | ||
uint64_t Off; | ||
if (const auto *S = StringLValueIsLiteral(Info, String, CharTy, Off)) { | ||
StringRef Str = S->getBytes().substr(Off); | ||
|
||
StringRef::size_type Pos = Str.find(0); | ||
if (Pos != StringRef::npos) | ||
Str = Str.substr(0, Pos); | ||
|
||
Result = Str.size(); | ||
return true; | ||
} | ||
|
||
// Slow path: scan the bytes of the string looking for the terminating 0. | ||
Result = 0; | ||
return IterateStringLValue(Info, E, CharTy, String, [&](int Char) { | ||
if (Char) { | ||
Result++; | ||
return true; | ||
} else | ||
return false; | ||
}); | ||
} | ||
|
||
Expr::StringEvalResult::StringEvalResult(const StringLiteral *SL, | ||
uint64_t Offset) | ||
: SL(SL), Offset(Offset) {} | ||
|
||
Expr::StringEvalResult::StringEvalResult(std::string Contents) | ||
: Storage(std::move(Contents)), SL(nullptr), Offset(0) {} | ||
|
||
llvm::StringRef Expr::StringEvalResult::getString() const { | ||
return SL ? SL->getBytes().substr(Offset) : Storage; | ||
} | ||
|
||
bool Expr::StringEvalResult::getStringLiteral(const StringLiteral *&SL, | ||
uint64_t &Offset) const { | ||
if (this->SL) { | ||
SL = this->SL; | ||
Offset = this->Offset; | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
std::optional<Expr::StringEvalResult> | ||
Expr::tryEvaluateString(ASTContext &Ctx, bool *NullTerminated, | ||
bool InConstantContext) const { | ||
if (NullTerminated) | ||
*NullTerminated = false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we put that in StringEvalResult instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Totally, not sure what was going through my head that it wasn't there to begin with. |
||
|
||
Expr::EvalStatus Status; | ||
EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold); | ||
uint64_t Result; | ||
std::string StringResult; | ||
EvalInfo Info(Ctx, Status, | ||
(InConstantContext && | ||
(Ctx.getLangOpts().CPlusPlus || Ctx.getLangOpts().C23)) | ||
? EvalInfo::EM_ConstantExpression | ||
: EvalInfo::EM_ConstantFold); | ||
Info.InConstantContext = InConstantContext; | ||
LValue String; | ||
QualType CharTy; | ||
if (!EvaluateStringAsLValue(Info, this, CharTy, String)) | ||
return {}; | ||
|
||
uint64_t Off; | ||
if (const auto *S = StringLValueIsLiteral(Info, String, CharTy, Off)) { | ||
if (NullTerminated) | ||
*NullTerminated = true; | ||
return StringEvalResult(S, Off); | ||
} | ||
|
||
std::string Result; | ||
bool NTFound = IterateStringLValue(Info, this, CharTy, String, [&](int Char) { | ||
if (Char) { | ||
Result.push_back(Char); | ||
return true; | ||
} else | ||
return false; | ||
}); | ||
|
||
if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult)) | ||
return StringResult; | ||
return {}; | ||
if (NullTerminated) | ||
*NullTerminated = NTFound; | ||
return StringEvalResult(Result); | ||
} | ||
|
||
template <typename T> | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could that just an aggregate, that we could use with structured binding?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a polite
union
ofstd::string
andStringLiteral
:Storage
is empty whenSL
is set, and vice versa.getString()
encapsulates useful logic to decide how to get aStringRef
back, which I think justifies having a separate type instead of an aggregate. I will add a comment on top of it.