Skip to content

Commit b2f0994

Browse files
authored
Merge pull request #15599 from aschackmull/dataflow/fieldflowbranchlimit-v2
Dataflow: update fieldFlowBranchLimit semantics
2 parents 19974f0 + 5950149 commit b2f0994

File tree

19 files changed

+302
-46
lines changed

19 files changed

+302
-46
lines changed

cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,8 @@ predicate knownSourceModel(Node source, string model) { none() }
290290

291291
predicate knownSinkModel(Node sink, string model) { none() }
292292

293+
class DataFlowSecondLevelScope = Unit;
294+
293295
/**
294296
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
295297
* side-effect, resulting in a summary from `p` to itself.

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ module CppDataFlow implements InputSig<Location> {
2222

2323
predicate getAdditionalFlowIntoCallNodeTerm = Private::getAdditionalFlowIntoCallNodeTerm/2;
2424

25+
predicate getSecondLevelScope = Private::getSecondLevelScope/1;
26+
2527
predicate validParameterAliasStep = Private::validParameterAliasStep/2;
2628

2729
predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1583,3 +1583,74 @@ predicate validParameterAliasStep(Node node1, Node node2) {
15831583
)
15841584
)
15851585
}
1586+
1587+
private predicate isTopLevel(Cpp::Stmt s) { any(Function f).getBlock().getAStmt() = s }
1588+
1589+
private Cpp::Stmt getAChainedBranch(Cpp::IfStmt s) {
1590+
result = s.getThen()
1591+
or
1592+
exists(Cpp::Stmt elseBranch | s.getElse() = elseBranch |
1593+
result = getAChainedBranch(elseBranch)
1594+
or
1595+
result = elseBranch and not elseBranch instanceof Cpp::IfStmt
1596+
)
1597+
}
1598+
1599+
private Instruction getInstruction(Node n) {
1600+
result = n.asInstruction() or
1601+
result = n.asOperand().getUse() or
1602+
result = n.(SsaPhiNode).getPhiNode().getBasicBlock().getFirstInstruction() or
1603+
n.(IndirectInstruction).hasInstructionAndIndirectionIndex(result, _) or
1604+
result = getInstruction(n.(PostUpdateNode).getPreUpdateNode())
1605+
}
1606+
1607+
private newtype TDataFlowSecondLevelScope =
1608+
TTopLevelIfBranch(Cpp::Stmt s) {
1609+
exists(Cpp::IfStmt ifstmt | s = getAChainedBranch(ifstmt) and isTopLevel(ifstmt))
1610+
} or
1611+
TTopLevelSwitchCase(Cpp::SwitchCase s) {
1612+
exists(Cpp::SwitchStmt switchstmt | s = switchstmt.getASwitchCase() and isTopLevel(switchstmt))
1613+
}
1614+
1615+
/**
1616+
* A second-level control-flow scope in a `switch` or a chained `if` statement.
1617+
*
1618+
* This is a `switch` case or a branch of a chained `if` statement, given that
1619+
* the `switch` or `if` statement is top level, that is, it is not nested inside
1620+
* other CFG constructs.
1621+
*/
1622+
class DataFlowSecondLevelScope extends TDataFlowSecondLevelScope {
1623+
/** Gets a textual representation of this element. */
1624+
string toString() {
1625+
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.toString())
1626+
or
1627+
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.toString())
1628+
}
1629+
1630+
/** Gets the primary location of this element. */
1631+
Cpp::Location getLocation() {
1632+
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s.getLocation())
1633+
or
1634+
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getLocation())
1635+
}
1636+
1637+
/**
1638+
* Gets a statement directly contained in this scope. For an `if` branch, this
1639+
* is the branch itself, and for a `switch case`, this is one the statements
1640+
* of that case branch.
1641+
*/
1642+
private Cpp::Stmt getAStmt() {
1643+
exists(Cpp::Stmt s | this = TTopLevelIfBranch(s) | result = s)
1644+
or
1645+
exists(Cpp::SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.getAStmt())
1646+
}
1647+
1648+
/** Gets a data-flow node nested within this scope. */
1649+
Node getANode() {
1650+
getInstruction(result).getAst().(Cpp::ControlFlowNode).getEnclosingStmt().getParentStmt*() =
1651+
this.getAStmt()
1652+
}
1653+
}
1654+
1655+
/** Gets the second-level scope containing the node `n`, if any. */
1656+
DataFlowSecondLevelScope getSecondLevelScope(Node n) { result.getANode() = n }

cpp/ql/test/library-tests/dataflow/taint-tests/swap2.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ void test_copy_assignment_operator()
8484

8585
swap(z1, z2);
8686

87-
sink(z2.data1); // $ ir MISSING: ast
87+
sink(z2.data1); // $ ir ast
8888
sink(z1.data1); // $ SPURIOUS: ir ast=81:27 ast=82:16
8989
}
9090

csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2882,6 +2882,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo
28822882

28832883
predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) }
28842884

2885+
class DataFlowSecondLevelScope = Unit;
2886+
28852887
/**
28862888
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
28872889
* side-effect, resulting in a summary from `p` to itself.

csharp/ql/test/library-tests/dataflow/collections/CollectionFlow.ql

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ module ArrayFlowConfig implements DataFlow::ConfigSig {
1414
mc.getAnArgument() = sink.asExpr()
1515
)
1616
}
17-
18-
int fieldFlowBranchLimit() { result = 100 }
1917
}
2018

2119
module ArrayFlow = DataFlow::Global<ArrayFlowConfig>;

csharp/ql/test/library-tests/dataflow/types/Types.ql

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ module TypesConfig implements DataFlow::ConfigSig {
1818
mc.getAnArgument() = sink.asExpr()
1919
)
2020
}
21-
22-
int fieldFlowBranchLimit() { result = 1000 }
2321
}
2422

2523
import ValueFlowTest<TypesConfig>

go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,8 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo
415415

416416
predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) }
417417

418+
class DataFlowSecondLevelScope = Unit;
419+
418420
/**
419421
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
420422
* side-effect, resulting in a summary from `p` to itself.

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowImplSpecific.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ module JavaDataFlow implements InputSig<Location> {
2020

2121
Node exprNode(DataFlowExpr e) { result = Public::exprNode(e) }
2222

23+
predicate getSecondLevelScope = Private::getSecondLevelScope/1;
24+
2325
predicate mayBenefitFromCallContext = Private::mayBenefitFromCallContext/1;
2426

2527
predicate viableImplInCallContext = Private::viableImplInCallContext/2;

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,81 @@ predicate knownSourceModel(Node source, string model) { sourceNode(source, _, mo
581581

582582
predicate knownSinkModel(Node sink, string model) { sinkNode(sink, _, model) }
583583

584+
private predicate isTopLevel(Stmt s) {
585+
any(Callable c).getBody() = s
586+
or
587+
exists(BlockStmt b | s = b.getAStmt() and isTopLevel(b))
588+
}
589+
590+
private Stmt getAChainedBranch(IfStmt s) {
591+
result = s.getThen()
592+
or
593+
exists(Stmt elseBranch | s.getElse() = elseBranch |
594+
result = getAChainedBranch(elseBranch)
595+
or
596+
result = elseBranch and not elseBranch instanceof IfStmt
597+
)
598+
}
599+
600+
private newtype TDataFlowSecondLevelScope =
601+
TTopLevelIfBranch(Stmt s) {
602+
exists(IfStmt ifstmt | s = getAChainedBranch(ifstmt) and isTopLevel(ifstmt))
603+
} or
604+
TTopLevelSwitchCase(SwitchCase s) {
605+
exists(SwitchStmt switchstmt | s = switchstmt.getACase() and isTopLevel(switchstmt))
606+
}
607+
608+
private SwitchCase getPrecedingCase(Stmt s) {
609+
result = s
610+
or
611+
exists(SwitchStmt switch, int i |
612+
s = switch.getStmt(i) and
613+
not s instanceof SwitchCase and
614+
result = getPrecedingCase(switch.getStmt(i - 1))
615+
)
616+
}
617+
618+
/**
619+
* A second-level control-flow scope in a `switch` or a chained `if` statement.
620+
*
621+
* This is a `switch` case or a branch of a chained `if` statement, given that
622+
* the `switch` or `if` statement is top level, that is, it is not nested inside
623+
* other CFG constructs.
624+
*/
625+
class DataFlowSecondLevelScope extends TDataFlowSecondLevelScope {
626+
/** Gets a textual representation of this element. */
627+
string toString() {
628+
exists(Stmt s | this = TTopLevelIfBranch(s) | result = s.toString())
629+
or
630+
exists(SwitchCase s | this = TTopLevelSwitchCase(s) | result = s.toString())
631+
}
632+
633+
/**
634+
* Gets a statement directly contained in this scope. For an `if` branch, this
635+
* is the branch itself, and for a `switch case`, this is one the statements
636+
* of that case branch.
637+
*/
638+
private Stmt getAStmt() {
639+
exists(Stmt s | this = TTopLevelIfBranch(s) | result = s)
640+
or
641+
exists(SwitchCase s | this = TTopLevelSwitchCase(s) |
642+
result = s.getRuleStatement() or
643+
s = getPrecedingCase(result)
644+
)
645+
}
646+
647+
/** Gets a data-flow node nested within this scope. */
648+
Node getANode() { getRelatedExpr(result).getAnEnclosingStmt() = this.getAStmt() }
649+
}
650+
651+
private Expr getRelatedExpr(Node n) {
652+
n.asExpr() = result or
653+
n.(PostUpdateNode).getPreUpdateNode().asExpr() = result
654+
}
655+
656+
/** Gets the second-level scope containing the node `n`, if any. */
657+
DataFlowSecondLevelScope getSecondLevelScope(Node n) { result.getANode() = n }
658+
584659
/**
585660
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
586661
* side-effect, resulting in a summary from `p` to itself.

java/ql/test/library-tests/frameworks/guava/handwritten/flow.ql

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ module ValueFlowConfig implements DataFlow::ConfigSig {
1818
predicate isSink(DataFlow::Node n) {
1919
exists(MethodCall ma | ma.getMethod().hasName("sink") | n.asExpr() = ma.getAnArgument())
2020
}
21-
22-
int fieldFlowBranchLimit() { result = 100 }
2321
}
2422

2523
module ValueFlow = DataFlow::Global<ValueFlowConfig>;

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,8 @@ predicate knownSinkModel(Node sink, string model) {
10871087
sink = ModelOutput::getASinkNode(_, model).asSink()
10881088
}
10891089

1090+
class DataFlowSecondLevelScope = Unit;
1091+
10901092
/**
10911093
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
10921094
* side-effect, resulting in a summary from `p` to itself.

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2254,6 +2254,8 @@ predicate knownSinkModel(Node sink, string model) {
22542254
sink = ModelOutput::getASinkNode(_, model).asSink()
22552255
}
22562256

2257+
class DataFlowSecondLevelScope = Unit;
2258+
22572259
/**
22582260
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
22592261
* side-effect, resulting in a summary from `p` to itself.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: majorAnalysis
3+
---
4+
* The data flow library performs heuristic filtering of code paths that have a high degree of control-flow uncertainty for improved performance in cases that are deemed unlikely to yield true positive flow paths. This filtering can be controlled with the `fieldFlowBranchLimit` predicate in configurations. Two bugs have been fixed in relation to this: Some cases of high uncertainty were not being correctly identified. This fix improves performance in certain scenarios. Another group of cases of low uncertainty were also being misidentified, which led to false negatives. Taken together, we generally expect some additional query results with more true positives and fewer false positives.

shared/dataflow/codeql/dataflow/DataFlow.qll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,24 @@ signature module InputSig<LocationSig Location> {
308308
*/
309309
default int getAdditionalFlowIntoCallNodeTerm(ArgumentNode arg, ParameterNode p) { none() }
310310

311+
/**
312+
* A second-level control-flow scope in a callable.
313+
*
314+
* This is used to provide a more fine-grained separation of a callable
315+
* context for the purpose of identifying uncertain control flow. For most
316+
* languages, this is not needed, as this separation is handled through
317+
* virtual dispatch, but for some cases (for example, C++) this can be used to
318+
* identify, for example, large top-level switch statements acting like
319+
* virtual dispatch.
320+
*/
321+
class DataFlowSecondLevelScope {
322+
/** Gets a textual representation of this element. */
323+
string toString();
324+
}
325+
326+
/** Gets the second-level scope containing the node `n`, if any. */
327+
default DataFlowSecondLevelScope getSecondLevelScope(Node n) { none() }
328+
311329
bindingset[call, p, arg]
312330
default predicate golangSpecificParamArgFilter(
313331
DataFlowCall call, ParameterNode p, ArgumentNode arg

0 commit comments

Comments
 (0)