Skip to content

Commit e9a5b7a

Browse files
committed
Reduce false positives in warnings about printfs
- Predefined name macros such as `__PRETTY_FUNCTION__` are considered safe. - We need to distinguish between `%s` and `%p` as both accept pointers but the latter is not a buffer operation. This leaves us no choice other than parsing the format string. Fortunately, the building blocks of format parsing have already existed and are quite handy.
1 parent cce5781 commit e9a5b7a

File tree

4 files changed

+93
-14
lines changed

4 files changed

+93
-14
lines changed

clang/include/clang/AST/FormatString.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,18 @@ bool ParsePrintfString(FormatStringHandler &H,
783783
bool ParseFormatStringHasSArg(const char *beg, const char *end,
784784
const LangOptions &LO, const TargetInfo &Target);
785785

786+
/// Parse C format string and return index (relative to `ArgIndex`) of the first
787+
/// found `s` specifier. Return 0 if not found.
788+
/// \param I The start of the C format string; Updated to the first unparsed
789+
/// position upon return.
790+
/// \param E The end of the C format string;
791+
/// \param ArgIndex The argument index of the last found `s` specifier; Or the
792+
/// argument index of the formatter in initial case.
793+
unsigned ParseFormatStringFirstSArgIndex(const char *&I, const char *E,
794+
unsigned ArgIndex,
795+
const LangOptions &LO,
796+
const TargetInfo &Target);
797+
786798
bool ParseScanfString(FormatStringHandler &H,
787799
const char *beg, const char *end, const LangOptions &LO,
788800
const TargetInfo &Target);

clang/lib/AST/PrintfFormatString.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,34 @@ bool clang::analyze_format_string::ParseFormatStringHasSArg(const char *I,
483483
return false;
484484
}
485485

486+
unsigned clang::analyze_format_string::ParseFormatStringFirstSArgIndex(
487+
const char *&I, const char *E, unsigned ArgIndex, const LangOptions &LO,
488+
const TargetInfo &Target) {
489+
unsigned argIndex = ArgIndex;
490+
491+
// Keep looking for a %s format specifier until we have exhausted the string.
492+
FormatStringHandler H;
493+
while (I != E) {
494+
const PrintfSpecifierResult &FSR =
495+
ParsePrintfSpecifier(H, I, E, argIndex, LO, Target, false, false);
496+
// Did a fail-stop error of any kind occur when parsing the specifier?
497+
// If so, don't do any more processing.
498+
if (FSR.shouldStop())
499+
return false;
500+
// Did we exhaust the string or encounter an error that
501+
// we can recover from?
502+
if (!FSR.hasValue())
503+
continue;
504+
const analyze_printf::PrintfSpecifier &FS = FSR.getValue();
505+
// Return true if this a %s format specifier.
506+
if (FS.getConversionSpecifier().getKind() ==
507+
ConversionSpecifier::Kind::sArg) {
508+
return FS.getPositionalArgIndex();
509+
}
510+
}
511+
return false;
512+
}
513+
486514
bool clang::analyze_format_string::parseFormatStringHasFormattingSpecifiers(
487515
const char *Begin, const char *End, const LangOptions &LO,
488516
const TargetInfo &Target) {

clang/lib/Analysis/UnsafeBufferUsage.cpp

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "clang/AST/DeclCXX.h"
1313
#include "clang/AST/Expr.h"
1414
#include "clang/AST/ExprCXX.h"
15+
#include "clang/AST/FormatString.h"
1516
#include "clang/AST/RecursiveASTVisitor.h"
1617
#include "clang/AST/Stmt.h"
1718
#include "clang/AST/StmtVisitor.h"
@@ -611,6 +612,38 @@ static bool isNullTermPointer(const Expr *Ptr) {
611612
return false;
612613
}
613614

615+
// Return true iff at least one of following cases holds:
616+
// 1. Format string is a literal and there is an unsafe pointer argument
617+
// corresponding to an `s` specifier;
618+
// 2. Format string is not a literal and there is least an unsafe pointer
619+
// argument (including the formatter argument).
620+
static bool hasUnsafeFormatOrSArg(const Expr *Fmt, unsigned FmtArgIdx,
621+
const CallExpr *Call, ASTContext &Ctx) {
622+
if (auto *SL = dyn_cast<StringLiteral>(Fmt->IgnoreParenImpCasts())) {
623+
StringRef FmtStr = SL->getString();
624+
auto I = FmtStr.begin();
625+
auto E = FmtStr.end();
626+
unsigned ArgIdx = FmtArgIdx;
627+
628+
do {
629+
ArgIdx = analyze_format_string::ParseFormatStringFirstSArgIndex(
630+
I, E, ArgIdx, Ctx.getLangOpts(), Ctx.getTargetInfo());
631+
if (ArgIdx && Call->getNumArgs() > ArgIdx &&
632+
!isNullTermPointer(Call->getArg(ArgIdx)))
633+
return true;
634+
} while (ArgIdx);
635+
return false;
636+
}
637+
// If format is not a string literal, we cannot analyze the format string.
638+
// In this case, this call is considered unsafe if at least one argument
639+
// (including the format argument) is unsafe pointer.
640+
return llvm::any_of(
641+
llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
642+
[](const Expr *Arg) {
643+
return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
644+
});
645+
}
646+
614647
// Matches a call to one of the `-printf" functions (excluding the ones with
615648
// va_list, or `-sprintf`s) that taking pointer-to-char-as-string arguments but
616649
// fail to guarantee their null-termination. In other words, these calls are
@@ -626,19 +659,18 @@ AST_MATCHER_P(CallExpr, unsafeStringInPrintfs, StringRef, CoreName) {
626659
if (Prefix.ends_with("w"))
627660
Prefix = Prefix.drop_back(1);
628661

629-
auto AnyUnsafeStrPtr = [](const Expr *Arg) -> bool {
630-
return Arg->getType()->isPointerType() && !isNullTermPointer(Arg);
631-
};
632-
633662
if (Prefix.empty() ||
634663
Prefix == "k") // printf: all pointer args should be null-terminated
635-
return any_of(Node.arguments(), AnyUnsafeStrPtr);
636-
if (Prefix == "f" && Node.getNumArgs() > 1)
637-
return any_of(llvm::make_range(Node.arg_begin() + 1, Node.arg_end()),
638-
AnyUnsafeStrPtr);
639-
if (Prefix == "sn" && Node.getNumArgs() > 2) {
640-
return any_of(llvm::make_range(Node.arg_begin() + 2, Node.arg_end()),
641-
AnyUnsafeStrPtr);
664+
return hasUnsafeFormatOrSArg(Node.getArg(0), 0, &Node,
665+
Finder->getASTContext());
666+
if (Prefix == "f")
667+
return hasUnsafeFormatOrSArg(Node.getArg(1), 1, &Node,
668+
Finder->getASTContext());
669+
if (Prefix == "sn") {
670+
// The first two arguments need to be in safe patterns, which is checked
671+
// by `isSafeSizedby`:
672+
return hasUnsafeFormatOrSArg(Node.getArg(2), 2, &Node,
673+
Finder->getASTContext());
642674
}
643675
return false; // A call to a "-printf" falls into another category.
644676
}
@@ -775,7 +807,7 @@ AST_MATCHER_P(CallExpr, ignoreLibcPrefixAndSuffix,
775807

776808
StringRef CoreName =
777809
TheLittleParser.matchName(II->getName(), FD->getBuiltinID());
778-
810+
779811
return InnerMatcher(CoreName).matches(Node, Finder, Builder);
780812
}
781813
} // namespace clang::ast_matchers

clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,20 @@ void f(char * p, char * q, std::span<char> s, std::span<char> s2) {
7070
sscanf(p, "%s%d", "hello", *p); // expected-warning{{function sscanf introduces unsafe buffer access}}
7171
sscanf_s(p, "%s%d", "hello", *p); // expected-warning{{function sscanf_s introduces unsafe buffer access}}
7272
fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, p); // expected-warning{{function fprintf introduces unsafe buffer access}} expected-note{{use 'std::string::c_str' or string literal as string pointer to guarantee null-termination}}
73+
fprintf((FILE*)p, "%P%d%p%i hello world %32s", *p, *p, p, *p, "hello"); // no warn
7374
printf("%s%d", "hello", *p); // no warn
7475
snprintf(s.data(), s.size_bytes(), "%s%d", "hello", *p); // no warn
76+
snprintf(s.data(), s.size_bytes(), "%s%d", __PRETTY_FUNCTION__, *p); // no warn
7577
strlen("hello");// no warn
7678
}
7779

78-
void v(std::string s1) {
79-
snprintf(s1.data(), s1.size_bytes(), "%s%d", s1.c_str(), 0); // no warn
80+
void v(std::string s1, int *p) {
81+
snprintf(s1.data(), s1.size_bytes(), "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
82+
snprintf(s1.data(), s1.size_bytes(), s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn
83+
printf("%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
84+
printf(s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn
85+
fprintf((FILE*)0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", p, s1.c_str()); // no warn
86+
fprintf((FILE*)0, s1.c_str(), __PRETTY_FUNCTION__, *p, "hello", s1.c_str()); // no warn
8087
}
8188

8289

0 commit comments

Comments
 (0)