Skip to content

[NFC][analyzer] Framework for multipart checkers #130985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 17, 2025
44 changes: 31 additions & 13 deletions clang/include/clang/StaticAnalyzer/Core/BugReporter/BugType.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h"
#include "clang/StaticAnalyzer/Core/Checker.h"
#include <string>
#include <variant>

namespace clang {

Expand All @@ -26,36 +27,53 @@ class BugReporter;

class BugType {
private:
const CheckerNameRef CheckerName;
struct CheckerPartRef {
const CheckerBase *Checker;
CheckerPartIdx Idx;
};
using CheckerNameInfo = std::variant<CheckerNameRef, CheckerPartRef>;

const CheckerNameInfo CheckerName;
const std::string Description;
const std::string Category;
const CheckerBase *Checker;
bool SuppressOnSink;

virtual void anchor();

public:
// Straightforward constructor where the checker name is specified directly.
// TODO: As far as I know all applications of this constructor involve ugly
// hacks that could be avoided by switching to a different constructor.
// When those are all eliminated, this constructor should be removed to
// eliminate the `variant` and simplify this class.
BugType(CheckerNameRef CheckerName, StringRef Desc,
StringRef Cat = categories::LogicError, bool SuppressOnSink = false)
: CheckerName(CheckerName), Description(Desc), Category(Cat),
Checker(nullptr), SuppressOnSink(SuppressOnSink) {}
SuppressOnSink(SuppressOnSink) {}
// Constructor that can be called from the constructor of a checker object.
// At that point the checker name is not yet available, but we can save a
// pointer to the checker and later use that to query the name.
BugType(const CheckerBase *Checker, StringRef Desc,
StringRef Cat = categories::LogicError, bool SuppressOnSink = false)
: CheckerName(), Description(Desc), Category(Cat), Checker(Checker),
SuppressOnSink(SuppressOnSink) {}
: CheckerName(CheckerPartRef{Checker, DefaultPart}), Description(Desc),
Category(Cat), SuppressOnSink(SuppressOnSink) {}
// Constructor that can be called from the constructor of a checker object
// when it has multiple parts with separate names. We save the name and the
// part index to be able to query the name of that part later.
BugType(const CheckerBase *Checker, CheckerPartIdx Idx, StringRef Desc,
StringRef Cat = categories::LogicError, bool SuppressOnSink = false)
: CheckerName(CheckerPartRef{Checker, Idx}), Description(Desc),
Category(Cat), SuppressOnSink(SuppressOnSink) {}
virtual ~BugType() = default;

StringRef getDescription() const { return Description; }
StringRef getCategory() const { return Category; }
StringRef getCheckerName() const {
// FIXME: This is a workaround to ensure that the correct checker name is
// used. The checker names are set after the constructors are run.
// In case the BugType object is initialized in the checker's ctor
// the CheckerName field will be empty. To circumvent this problem we use
// CheckerBase whenever it is possible.
StringRef Ret = Checker ? Checker->getName() : CheckerName;
assert(!Ret.empty() && "Checker name is not set properly.");
return Ret;
if (const auto *CNR = std::get_if<CheckerNameRef>(&CheckerName))
return *CNR;

auto [Checker, Idx] = std::get<CheckerPartRef>(CheckerName);
return Checker->getName(Idx);
}

/// isSuppressOnSink - Returns true if bug reports associated with this bug
Expand Down
54 changes: 49 additions & 5 deletions clang/include/clang/StaticAnalyzer/Core/Checker.h
Original file line number Diff line number Diff line change
Expand Up @@ -485,16 +485,60 @@ class Call {
} // end eval namespace

class CheckerBase : public ProgramPointTag {
CheckerNameRef Name;
/// A single checker class (i.e. a subclass of `CheckerBase`) can implement
/// multiple user-facing checkers that have separate names and can be enabled
/// separately, but are backed by the same singleton checker object.
SmallVector<std::optional<CheckerNameRef>, 1> RegisteredNames;

friend class ::clang::ento::CheckerManager;

public:
StringRef getTagDescription() const override;
CheckerNameRef getName() const;
CheckerNameRef getName(CheckerPartIdx Idx = DefaultPart) const {
assert(Idx < RegisteredNames.size() && "Checker part index is too large!");
std::optional<CheckerNameRef> Name = RegisteredNames[Idx];
assert(Name && "Requested checker part is not registered!");
return *Name;
}

bool isPartEnabled(CheckerPartIdx Idx) const {
return Idx < RegisteredNames.size() && RegisteredNames[Idx].has_value();
}

void registerCheckerPart(CheckerPartIdx Idx, CheckerNameRef Name) {
// Paranoia: notice if e.g. UINT_MAX is passed as a checker part index.
assert(Idx < 256 && "Checker part identifiers should be small integers.");

if (Idx >= RegisteredNames.size())
RegisteredNames.resize(Idx + 1, std::nullopt);

assert(!RegisteredNames[Idx] && "Repeated registration of checker a part!");
RegisteredNames[Idx] = Name;
}

StringRef getTagDescription() const override {
// This method inherited from `ProgramPointTag` has two unrelated roles:
// (1) The analyzer option handling logic uses this method to query the
// name of a checker.
// (2) When the `ExplodedGraph` is dumped in DOT format for debugging,
// this is called to attach a description to the nodes. (This happens
// for all subclasses of `ProgramPointTag`, not just checkers.)
// FIXME: Application (1) should be aware of multiple parts within the same
// checker class instance, so it should directly use `getName` instead of
// this inherited interface which cannot support a `CheckerPartIdx`.
// FIXME: Ideally application (2) should return a string that describes the
// whole checker class, not just one of it parts. However, this is only for
// debugging, so returning the name of one part is probably good enough.
Comment on lines +519 to +530
Copy link
Contributor Author

@NagyDonat NagyDonat Mar 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will clean up this situation soon -- perhaps within this PR, perhaps in a follow-up commit that can be merged shortly after this one.

I already tried to refactor the checker option handling code to use getName instead of this method, but I ran into awkward issues where a unittest introduces mocked checker classes and relies on the fact that this method is virtual (while getName is not virtual and has no valid reason to be virtual).

EDIT: I realized that there is a way to convert that unittest.

Copy link
Contributor Author

@NagyDonat NagyDonat Mar 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have concrete plans for cleaning up the issues around this method, but I would like to implement them as a separate follow-up commit when this PR is merged (or at least stable enough).

for (const auto &OptName : RegisteredNames)
if (OptName)
return *OptName;

return "Unregistered checker";
}

/// See CheckerManager::runCheckersForPrintState.
/// Debug state dump callback, see CheckerManager::runCheckersForPrintState.
/// Default implementation does nothing.
virtual void printState(raw_ostream &Out, ProgramStateRef State,
const char *NL, const char *Sep) const { }
const char *NL, const char *Sep) const;
};

/// Dump checker name to stream.
Expand Down
54 changes: 41 additions & 13 deletions clang/include/clang/StaticAnalyzer/Core/CheckerManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,19 @@ class CheckerNameRef {
operator StringRef() const { return Name; }
};

/// A single checker class (and its singleton instance) can act as the
/// implementation of several (user-facing or modeling) checker parts that
/// have shared state and logic, but have their own names and can be enabled or
/// disabled separately.
/// Each checker class that implement multiple parts introduces its own enum
/// type to assign small numerical indices (0, 1, 2 ...) to their parts. The
/// type alias 'CheckerPartIdx' is conceptually the union of these enum types.
using CheckerPartIdx = unsigned;

/// If a checker doesn't have multiple parts, then its single part is
/// represented by this index.
constexpr inline CheckerPartIdx DefaultPart = 0;

enum class ObjCMessageVisitKind {
Pre,
Post,
Expand Down Expand Up @@ -190,23 +203,38 @@ class CheckerManager {
// Checker registration.
//===--------------------------------------------------------------------===//

/// Used to register checkers.
/// All arguments are automatically passed through to the checker
/// constructor.
/// Construct the singleton instance of a checker, register it for the
/// supported callbacks and record its name with `registerCheckerPart()`.
/// Arguments passed to this function are forwarded to the constructor of the
/// checker.
///
/// If `CHECKER` has multiple parts, then the constructor call and the
/// callback registration only happen within the first `registerChecker()`
/// call; while the subsequent calls only enable additional parts of the
/// existing checker object (while registering their names).
///
/// \returns a pointer to the checker object.
template <typename CHECKER, typename... AT>
CHECKER *registerChecker(AT &&... Args) {
template <typename CHECKER, CheckerPartIdx Idx = DefaultPart, typename... AT>
CHECKER *registerChecker(AT &&...Args) {
// This assert could be removed but then we need to make sure that calls
// registering different parts of the same checker pass the same arguments.
static_assert(
Idx == DefaultPart || !sizeof...(AT),
"Argument forwarding isn't supported with multi-part checkers!");

CheckerTag Tag = getTag<CHECKER>();
std::unique_ptr<CheckerBase> &Ref = CheckerTags[Tag];
assert(!Ref && "Checker already registered, use getChecker!");

std::unique_ptr<CHECKER> Checker =
std::make_unique<CHECKER>(std::forward<AT>(Args)...);
Checker->Name = CurrentCheckerName;
CHECKER::_register(Checker.get(), *this);
Ref = std::move(Checker);
return static_cast<CHECKER *>(Ref.get());
std::unique_ptr<CheckerBase> &Ref = CheckerTags[Tag];
if (!Ref) {
std::unique_ptr<CHECKER> Checker =
std::make_unique<CHECKER>(std::forward<AT>(Args)...);
CHECKER::_register(Checker.get(), *this);
Ref = std::move(Checker);
}

CHECKER *Result = static_cast<CHECKER *>(Ref.get());
Result->registerCheckerPart(Idx, CurrentCheckerName);
return Result;
}

template <typename CHECKER>
Expand Down
53 changes: 19 additions & 34 deletions clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,14 @@ class DivZeroChecker : public Checker<check::PreStmt<BinaryOperator>> {

public:
/// This checker class implements several user facing checkers
enum CheckKind { CK_DivideZero, CK_TaintedDivChecker, CK_NumCheckKinds };
bool ChecksEnabled[CK_NumCheckKinds] = {false};
CheckerNameRef CheckNames[CK_NumCheckKinds];
mutable std::unique_ptr<BugType> BugTypes[CK_NumCheckKinds];
enum : CheckerPartIdx {
DivideZeroChecker,
TaintedDivChecker,
NumCheckerParts
};
BugType BugTypes[NumCheckerParts] = {
{this, DivideZeroChecker, "Division by zero"},
{this, TaintedDivChecker, "Division by zero", categories::TaintedData}};

void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
};
Expand All @@ -52,14 +56,11 @@ static const Expr *getDenomExpr(const ExplodedNode *N) {

void DivZeroChecker::reportBug(StringRef Msg, ProgramStateRef StateZero,
CheckerContext &C) const {
if (!ChecksEnabled[CK_DivideZero])
if (!isPartEnabled(DivideZeroChecker))
return;
if (!BugTypes[CK_DivideZero])
BugTypes[CK_DivideZero].reset(
new BugType(CheckNames[CK_DivideZero], "Division by zero"));
if (ExplodedNode *N = C.generateErrorNode(StateZero)) {
auto R = std::make_unique<PathSensitiveBugReport>(*BugTypes[CK_DivideZero],
Msg, N);
auto R = std::make_unique<PathSensitiveBugReport>(
BugTypes[DivideZeroChecker], Msg, N);
bugreporter::trackExpressionValue(N, getDenomExpr(N), *R);
C.emitReport(std::move(R));
}
Expand All @@ -68,15 +69,11 @@ void DivZeroChecker::reportBug(StringRef Msg, ProgramStateRef StateZero,
void DivZeroChecker::reportTaintBug(
StringRef Msg, ProgramStateRef StateZero, CheckerContext &C,
llvm::ArrayRef<SymbolRef> TaintedSyms) const {
if (!ChecksEnabled[CK_TaintedDivChecker])
if (!isPartEnabled(TaintedDivChecker))
return;
if (!BugTypes[CK_TaintedDivChecker])
BugTypes[CK_TaintedDivChecker].reset(
new BugType(CheckNames[CK_TaintedDivChecker], "Division by zero",
categories::TaintedData));
if (ExplodedNode *N = C.generateNonFatalErrorNode(StateZero)) {
auto R = std::make_unique<PathSensitiveBugReport>(
*BugTypes[CK_TaintedDivChecker], Msg, N);
BugTypes[TaintedDivChecker], Msg, N);
bugreporter::trackExpressionValue(N, getDenomExpr(N), *R);
for (auto Sym : TaintedSyms)
R->markInteresting(Sym);
Expand Down Expand Up @@ -129,28 +126,16 @@ void DivZeroChecker::checkPreStmt(const BinaryOperator *B,
C.addTransition(stateNotZero);
}

void ento::registerDivZeroChecker(CheckerManager &mgr) {
DivZeroChecker *checker = mgr.registerChecker<DivZeroChecker>();
checker->ChecksEnabled[DivZeroChecker::CK_DivideZero] = true;
checker->CheckNames[DivZeroChecker::CK_DivideZero] =
mgr.getCurrentCheckerName();
void ento::registerDivZeroChecker(CheckerManager &Mgr) {
Mgr.registerChecker<DivZeroChecker, DivZeroChecker::DivideZeroChecker>();
}

bool ento::shouldRegisterDivZeroChecker(const CheckerManager &mgr) {
return true;
}
bool ento::shouldRegisterDivZeroChecker(const CheckerManager &) { return true; }

void ento::registerTaintedDivChecker(CheckerManager &mgr) {
DivZeroChecker *checker;
if (!mgr.isRegisteredChecker<DivZeroChecker>())
checker = mgr.registerChecker<DivZeroChecker>();
else
checker = mgr.getChecker<DivZeroChecker>();
checker->ChecksEnabled[DivZeroChecker::CK_TaintedDivChecker] = true;
checker->CheckNames[DivZeroChecker::CK_TaintedDivChecker] =
mgr.getCurrentCheckerName();
void ento::registerTaintedDivChecker(CheckerManager &Mgr) {
Mgr.registerChecker<DivZeroChecker, DivZeroChecker::TaintedDivChecker>();
}

bool ento::shouldRegisterTaintedDivChecker(const CheckerManager &mgr) {
bool ento::shouldRegisterTaintedDivChecker(const CheckerManager &) {
return true;
}
5 changes: 2 additions & 3 deletions clang/lib/StaticAnalyzer/Core/Checker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ using namespace ento;

int ImplicitNullDerefEvent::Tag;

StringRef CheckerBase::getTagDescription() const { return getName(); }

CheckerNameRef CheckerBase::getName() const { return Name; }
void CheckerBase::printState(raw_ostream &Out, ProgramStateRef State,
const char *NL, const char *Sep) const {}

CheckerProgramPointTag::CheckerProgramPointTag(StringRef CheckerName,
StringRef Msg)
Expand Down