Skip to content

Commit bb27d5e

Browse files
authored
[analyzer] Don't assume third iteration in loops (#119388)
This commit ensures that if the loop condition is opaque (the analyzer cannot determine whether it's true or false) and there were at least two iterations, then the analyzer doesn't make the unjustified assumption that it can enter yet another iteration. Note that the presence of a loop suggests that the developer thought that two iterations can happen (otherwise an `if` would've been sufficient), but it does not imply that the developer expected three or four iterations -- and in fact there are many false positives where a loop iterates over a two-element (or three-element) data structure, but the analyzer cannot understand the loop condition and blindly assumes that there may be three or more iterations. (In particular, analyzing the FFMPEG project produces 100+ such false positives.) Moreover, this provides some performance improvements in the sense that the analyzer won't waste time on traversing the execution paths with 3 or 4 iterations in a loop (which are very similar to the paths with 2 iterations) and therefore will be able to traverse more branches elsewhere on the `ExplodedGraph`. This logic is disabled if the user enables the widen-loops analyzer option (which is disabled by default), because the "simulate one final iteration after the invalidation" execution path would be suppressed by the "exit the loop if the loop condition is opaque and there were at least two iterations" logic. If we want to support loop widening, we would need to create a follow-up commit which ensures that it "plays nicely" with this logic.
1 parent a9a3fb5 commit bb27d5e

File tree

8 files changed

+362
-41
lines changed

8 files changed

+362
-41
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,13 @@ New features
11571157
Crash and bug fixes
11581158
^^^^^^^^^^^^^^^^^^^
11591159

1160+
- In loops where the loop condition is opaque (i.e. the analyzer cannot
1161+
determine whether it's true or false), the analyzer will no longer assume
1162+
execution paths that perform more that two iterations. These unjustified
1163+
assumptions caused false positive reports (e.g. 100+ out-of-bounds reports in
1164+
the FFMPEG codebase) in loops where the programmer intended only two or three
1165+
steps but the analyzer wasn't able to understand that the loop is limited.
1166+
11601167
Improvements
11611168
^^^^^^^^^^^^
11621169

clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ class CoreEngine {
126126
ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
127127
const ReturnStmt *RS);
128128

129+
/// Helper function called by `HandleBranch()`. If the currently handled
130+
/// branch corresponds to a loop, this returns the number of already
131+
/// completed iterations in that loop, otherwise the return value is
132+
/// `std::nullopt`. Note that this counts _all_ earlier iterations, including
133+
/// ones that were performed within an earlier iteration of an outer loop.
134+
std::optional<unsigned> getCompletedIterationCount(const CFGBlock *B,
135+
ExplodedNode *Pred) const;
136+
129137
public:
130138
/// Construct a CoreEngine object to analyze the provided CFG.
131139
CoreEngine(ExprEngine &exprengine,

clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -321,14 +321,14 @@ class ExprEngine {
321321
NodeBuilderWithSinks &nodeBuilder,
322322
ExplodedNode *Pred);
323323

324-
/// ProcessBranch - Called by CoreEngine. Used to generate successor
325-
/// nodes by processing the 'effects' of a branch condition.
326-
void processBranch(const Stmt *Condition,
327-
NodeBuilderContext& BuilderCtx,
328-
ExplodedNode *Pred,
329-
ExplodedNodeSet &Dst,
330-
const CFGBlock *DstT,
331-
const CFGBlock *DstF);
324+
/// ProcessBranch - Called by CoreEngine. Used to generate successor nodes by
325+
/// processing the 'effects' of a branch condition. If the branch condition
326+
/// is a loop condition, IterationsCompletedInLoop is the number of completed
327+
/// iterations (otherwise it's std::nullopt).
328+
void processBranch(const Stmt *Condition, NodeBuilderContext &BuilderCtx,
329+
ExplodedNode *Pred, ExplodedNodeSet &Dst,
330+
const CFGBlock *DstT, const CFGBlock *DstF,
331+
std::optional<unsigned> IterationsCompletedInLoop);
332332

333333
/// Called by CoreEngine.
334334
/// Used to generate successor nodes for temporary destructors depending
@@ -588,6 +588,8 @@ class ExprEngine {
588588
void evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src,
589589
const Expr *Ex);
590590

591+
bool didEagerlyAssumeBifurcateAt(ProgramStateRef State, const Expr *Ex) const;
592+
591593
static std::pair<const ProgramPointTag *, const ProgramPointTag *>
592594
getEagerlyAssumeBifurcationTags();
593595

clang/lib/StaticAnalyzer/Core/CoreEngine.cpp

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,8 @@ void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term,
444444
NodeBuilderContext Ctx(*this, B, Pred);
445445
ExplodedNodeSet Dst;
446446
ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()),
447-
*(B->succ_begin() + 1));
447+
*(B->succ_begin() + 1),
448+
getCompletedIterationCount(B, Pred));
448449
// Enqueue the new frontier onto the worklist.
449450
enqueue(Dst);
450451
}
@@ -591,6 +592,30 @@ ExplodedNode *CoreEngine::generateCallExitBeginNode(ExplodedNode *N,
591592
return isNew ? Node : nullptr;
592593
}
593594

595+
std::optional<unsigned>
596+
CoreEngine::getCompletedIterationCount(const CFGBlock *B,
597+
ExplodedNode *Pred) const {
598+
const LocationContext *LC = Pred->getLocationContext();
599+
BlockCounter Counter = WList->getBlockCounter();
600+
unsigned BlockCount =
601+
Counter.getNumVisited(LC->getStackFrame(), B->getBlockID());
602+
603+
const Stmt *Term = B->getTerminatorStmt();
604+
if (isa<ForStmt, WhileStmt, CXXForRangeStmt>(Term)) {
605+
assert(BlockCount >= 1 &&
606+
"Block count of currently analyzed block must be >= 1");
607+
return BlockCount - 1;
608+
}
609+
if (isa<DoStmt>(Term)) {
610+
// In a do-while loop one iteration happens before the first evaluation of
611+
// the loop condition, so we don't subtract one.
612+
return BlockCount;
613+
}
614+
// ObjCForCollectionStmt is skipped intentionally because the current
615+
// application of the iteration counts is not relevant for it.
616+
return std::nullopt;
617+
}
618+
594619
void CoreEngine::enqueue(ExplodedNodeSet &Set) {
595620
for (const auto I : Set)
596621
WList->enqueue(I);

clang/lib/StaticAnalyzer/Core/ExprEngine.cpp

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2760,12 +2760,10 @@ assumeCondition(const Stmt *Condition, ExplodedNode *N) {
27602760
return State->assume(V);
27612761
}
27622762

2763-
void ExprEngine::processBranch(const Stmt *Condition,
2764-
NodeBuilderContext& BldCtx,
2765-
ExplodedNode *Pred,
2766-
ExplodedNodeSet &Dst,
2767-
const CFGBlock *DstT,
2768-
const CFGBlock *DstF) {
2763+
void ExprEngine::processBranch(
2764+
const Stmt *Condition, NodeBuilderContext &BldCtx, ExplodedNode *Pred,
2765+
ExplodedNodeSet &Dst, const CFGBlock *DstT, const CFGBlock *DstF,
2766+
std::optional<unsigned> IterationsCompletedInLoop) {
27692767
assert((!Condition || !isa<CXXBindTemporaryExpr>(Condition)) &&
27702768
"CXXBindTemporaryExprs are handled by processBindTemporary.");
27712769
const LocationContext *LCtx = Pred->getLocationContext();
@@ -2808,8 +2806,35 @@ void ExprEngine::processBranch(const Stmt *Condition,
28082806
if (StTrue && StFalse)
28092807
assert(!isa<ObjCForCollectionStmt>(Condition));
28102808

2811-
if (StTrue)
2812-
Builder.generateNode(StTrue, true, PredN);
2809+
if (StTrue) {
2810+
// If we are processing a loop condition where two iterations have
2811+
// already been completed and the false branch is also feasible, then
2812+
// don't assume a third iteration because it is a redundant execution
2813+
// path (unlikely to be different from earlier loop exits) and can cause
2814+
// false positives if e.g. the loop iterates over a two-element structure
2815+
// with an opaque condition.
2816+
//
2817+
// The iteration count "2" is hardcoded because it's the natural limit:
2818+
// * the fact that the programmer wrote a loop (and not just an `if`)
2819+
// implies that they thought that the loop body might be executed twice;
2820+
// * however, there are situations where the programmer knows that there
2821+
// are at most two iterations but writes a loop that appears to be
2822+
// generic, because there is no special syntax for "loop with at most
2823+
// two iterations". (This pattern is common in FFMPEG and appears in
2824+
// many other projects as well.)
2825+
bool CompletedTwoIterations = IterationsCompletedInLoop.value_or(0) >= 2;
2826+
bool FalseAlsoFeasible =
2827+
StFalse ||
2828+
didEagerlyAssumeBifurcateAt(PrevState, dyn_cast<Expr>(Condition));
2829+
bool SkipTrueBranch = CompletedTwoIterations && FalseAlsoFeasible;
2830+
2831+
// FIXME: This "don't assume third iteration" heuristic partially
2832+
// conflicts with the widen-loop analysis option (which is off by
2833+
// default). If we intend to support and stabilize the loop widening,
2834+
// we must ensure that it 'plays nicely' with this logic.
2835+
if (!SkipTrueBranch || AMgr.options.ShouldWidenLoops)
2836+
Builder.generateNode(StTrue, true, PredN);
2837+
}
28132838

28142839
if (StFalse)
28152840
Builder.generateNode(StFalse, false, PredN);
@@ -3731,6 +3756,12 @@ ExprEngine::getEagerlyAssumeBifurcationTags() {
37313756
return std::make_pair(&TrueTag, &FalseTag);
37323757
}
37333758

3759+
/// If the last EagerlyAssume attempt was successful (i.e. the true and false
3760+
/// cases were both feasible), this state trait stores the expression where it
3761+
/// happened; otherwise this holds nullptr.
3762+
REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeExprIfSuccessful,
3763+
const Expr *)
3764+
37343765
void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
37353766
ExplodedNodeSet &Src,
37363767
const Expr *Ex) {
@@ -3746,13 +3777,19 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
37463777
}
37473778

37483779
ProgramStateRef State = Pred->getState();
3780+
State = State->set<LastEagerlyAssumeExprIfSuccessful>(nullptr);
37493781
SVal V = State->getSVal(Ex, Pred->getLocationContext());
37503782
std::optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
37513783
if (SEV && SEV->isExpression()) {
37523784
const auto &[TrueTag, FalseTag] = getEagerlyAssumeBifurcationTags();
37533785

37543786
auto [StateTrue, StateFalse] = State->assume(*SEV);
37553787

3788+
if (StateTrue && StateFalse) {
3789+
StateTrue = StateTrue->set<LastEagerlyAssumeExprIfSuccessful>(Ex);
3790+
StateFalse = StateFalse->set<LastEagerlyAssumeExprIfSuccessful>(Ex);
3791+
}
3792+
37563793
// First assume that the condition is true.
37573794
if (StateTrue) {
37583795
SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
@@ -3770,6 +3807,11 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst,
37703807
}
37713808
}
37723809

3810+
bool ExprEngine::didEagerlyAssumeBifurcateAt(ProgramStateRef State,
3811+
const Expr *Ex) const {
3812+
return Ex && State->get<LastEagerlyAssumeExprIfSuccessful>() == Ex;
3813+
}
3814+
37733815
void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt *A, ExplodedNode *Pred,
37743816
ExplodedNodeSet &Dst) {
37753817
StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);

0 commit comments

Comments
 (0)