Skip to content

Commit bef7b00

Browse files
committed
C++: Use the new dataflow library in the 'missing scanf' query.
1 parent 8c46bfd commit bef7b00

File tree

3 files changed

+92
-132
lines changed

3 files changed

+92
-132
lines changed

cpp/ql/src/Critical/MissingCheckScanf.ql

Lines changed: 81 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -16,168 +16,131 @@
1616
import cpp
1717
import semmle.code.cpp.commons.Scanf
1818
import semmle.code.cpp.controlflow.Guards
19-
import semmle.code.cpp.ir.dataflow.DataFlow
19+
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
2020
import semmle.code.cpp.ir.IR
2121
import semmle.code.cpp.ir.ValueNumbering
2222

23-
/**
24-
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
25-
*
26-
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
27-
* of `call`, and `vn` is the value number of `instr.`
28-
*/
29-
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
30-
output = call.getOutputArgument(index).getFullyConverted() and
31-
instr.getConvertedResultExpression() = output and
32-
vn.getAnInstruction() = instr
33-
}
34-
35-
/**
36-
* Holds if `instr` is control-flow reachable in 0 or more steps from
37-
* a call to a `scanf`-like function.
38-
*/
23+
/** Holds if `n` reaches an argument to a call to a `scanf`-like function. */
3924
pragma[nomagic]
40-
predicate fwdFlow0(Instruction instr) {
41-
isSource(_, _, instr, _, _)
25+
predicate revFlow0(Node n) {
26+
isSink(_, _, n, _)
4227
or
43-
exists(Instruction prev |
44-
fwdFlow0(prev) and
45-
prev.getASuccessor() = instr
46-
)
28+
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
4729
}
4830

4931
/**
50-
* Holds if `instr` is part of the IR translation of `access` that
51-
* is not an expression being deallocated, and `instr` has value
52-
* number `vn`.
32+
* Holds if `n` represents an uninitialized stack-allocated variable, or a
33+
* newly (and presumed uninitialized) heap allocation.
5334
*/
54-
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
55-
instr.getAst() = access and
56-
not any(DeallocationExpr dealloc).getFreedExpr() = access and
57-
vn.getAnInstruction() = instr
35+
predicate isUninitialized(Node n) {
36+
exists(n.asUninitialized()) or
37+
n.asIndirectExpr(1) instanceof AllocationExpr
5838
}
5939

60-
/**
61-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
62-
* to a use of the written variable.
63-
*/
6440
pragma[nomagic]
65-
predicate revFlow0(Instruction instr) {
66-
fwdFlow0(instr) and
41+
predicate fwdFlow0(Node n) {
42+
revFlow0(n) and
6743
(
68-
isSink(instr, _, _)
44+
isUninitialized(n)
6945
or
70-
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
46+
exists(Node prev |
47+
fwdFlow0(prev) and
48+
localFlowStep(prev, n)
49+
)
7150
)
7251
}
7352

53+
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
54+
input = call.getOutputArgument(index) and
55+
n.asIndirectExpr() = input
56+
}
57+
7458
/**
75-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
76-
* that writes to a variable with value number `vn`, without passing through
77-
* redefinitions of the variable.
59+
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
60+
* argument that has not been previously initialized.
7861
*/
79-
pragma[nomagic]
80-
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
81-
revFlow0(instr) and
82-
(
83-
isSource(_, _, instr, vn, _)
84-
or
85-
exists(Instruction prev |
86-
fwdFlow(prev, vn) and
87-
prev.getASuccessor() = instr and
88-
not isBarrier(instr, vn)
89-
)
90-
)
62+
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
63+
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
64+
}
65+
66+
/**
67+
* Holds if `call` is a `scanf`-like function that may write to `output` at
68+
* index `index` and `n` is the dataflow node that represents the data after
69+
* it has been written to by `call`.
70+
*/
71+
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
72+
isRelevantScanfCall(call, index, output) and
73+
output = call.getOutputArgument(index) and
74+
n.asDefiningArgument() = output
9175
}
9276

9377
/**
94-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
95-
* that writes to a variable with value number `vn`, without passing through
96-
* redefinitions of the variable.
97-
*
98-
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
99-
* control-flow reachable from an argument to a `scanf`-like function call.
78+
* Holds if `n` is reachable from an output argument of a relevant call to
79+
* a `scanf`-like function.
10080
*/
10181
pragma[nomagic]
102-
predicate revFlow(Instruction instr, ValueNumber vn) {
103-
fwdFlow(instr, pragma[only_bind_out](vn)) and
104-
(
105-
isSink(instr, _, vn)
106-
or
107-
exists(Instruction succ | revFlow(succ, vn) |
108-
instr.getASuccessor() = succ and
109-
not isBarrier(succ, vn)
110-
)
82+
predicate fwdFlow(Node n) {
83+
isSource(_, _, n, _)
84+
or
85+
exists(Node prev |
86+
fwdFlow(prev) and
87+
localFlowStep(prev, n) and
88+
not isSink(prev, _)
11189
)
11290
}
11391

11492
/**
115-
* A type that bundles together a reachable instruction with the appropriate
116-
* value number (i.e., the value number that's transferred from the source
117-
* to the sink).
93+
* Holds if `n` is a node such that `n.asExpr() = e` and `e` is not an
94+
* argument of a deallocation expression.
11895
*/
119-
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
120-
121-
class Node extends MkNode {
122-
ValueNumber vn;
123-
Instruction instr;
124-
125-
Node() { this = MkNode(instr, vn) }
126-
127-
final string toString() { result = instr.toString() }
128-
129-
final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
130-
131-
final Location getLocation() { result = instr.getLocation() }
96+
predicate isSink(Node n, Expr e) {
97+
n.asExpr() = e and
98+
not any(DeallocationExpr dealloc).getFreedExpr() = e
13299
}
133100

134101
/**
135-
* Holds if `instr` is an instruction with value number `vn` that is
136-
* used in a store operation, or is overwritten by another call to
137-
* a `scanf`-like function.
102+
* Holds if `n` is part of a path from a call to a `scanf`-like function
103+
* to a use of the written variable.
138104
*/
139-
private predicate isBarrier(Instruction instr, ValueNumber vn) {
140-
// We only need to compute barriers for instructions that we
141-
// managed to hit during the initial flow stage.
142-
revFlow0(pragma[only_bind_into](instr)) and
143-
valueNumber(instr) = vn and
144-
exists(Expr e | instr.getAst() = e |
145-
instr = any(StoreInstruction s).getDestinationAddress()
105+
pragma[nomagic]
106+
predicate revFlow(Node n) {
107+
fwdFlow(n) and
108+
(
109+
isSink(n, _)
146110
or
147-
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
111+
exists(Node succ |
112+
revFlow(succ) and
113+
localFlowStep(n, succ) and
114+
not isSink(n, _)
115+
)
148116
)
149117
}
150118

151-
/** Holds if `n1` steps to `n2` in a single step. */
152-
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
153-
154-
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
119+
/** A local flow step, restricted to relevant dataflow nodes. */
120+
private predicate step(Node n1, Node n2) {
121+
revFlow(n1) and
122+
revFlow(n2) and
123+
localFlowStep(n1, n2)
124+
}
155125

156-
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
126+
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
157127

158128
/**
159129
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
160-
* an instruction that is part of the translation of `access` which is a transitive
161-
* control-flow successor of `call`.
162-
*
163-
* Furthermore, `source` and `sink` have identical global value numbers.
130+
* a dataflow node that represents the expression `e`.
164131
*/
165-
predicate hasFlow(
166-
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
167-
) {
168-
exists(ValueNumber vn |
169-
isSource(call, index, source, vn, _) and
170-
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
171-
isSink(sink, access, vn)
172-
)
132+
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
133+
isSource(call, index, source, _) and
134+
hasFlow(source, sink) and
135+
isSink(sink, e)
173136
}
174137

175138
/**
176139
* Gets the smallest possible `scanf` return value of `call` that would indicate
177140
* success in writing the output argument at index `index`.
178141
*/
179142
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
180-
isSource(call, index, _, _, _) and
143+
isSource(call, index, _, _) and
181144
result =
182145
index + 1 -
183146
count(ScanfFormatLiteral f, int n |
@@ -191,7 +154,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
191154
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
192155
* at least `minGuard`.
193156
*/
194-
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
157+
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
195158
exists(int index |
196159
hasFlow(_, call, index, _, e) and
197160
minGuard = getMinimumGuardConstant(call, index)
@@ -211,7 +174,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
211174
exists(GuardCondition g, Expr left, Expr right |
212175
right = g.getAChild() and
213176
value = left.getValue().toInt() and
214-
DataFlow::localExprFlow(call, right)
177+
localExprFlow(call, right)
215178
|
216179
g.ensuresEq(left, right, 0, result, true) and op = "=="
217180
or
@@ -221,9 +184,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
221184
)
222185
}
223186

224-
from ScanfFunctionCall call, Access access, int minGuard
225-
where hasNonGuardedAccess(call, access, minGuard)
226-
select access,
187+
from ScanfFunctionCall call, Expr e, int minGuard
188+
where hasNonGuardedAccess(call, e, minGuard)
189+
select e,
227190
"This variable is read, but may not have been written. " +
228191
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
229192
call.toString()
Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,21 @@
11
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
2-
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
32
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
43
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
5-
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
6-
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
4+
| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
5+
| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
76
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
87
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
98
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
109
| test.cpp:173:8:173:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:171:7:171:11 | call to scanf | call to scanf |
1110
| test.cpp:205:8:205:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:204:7:204:11 | call to scanf | call to scanf |
1211
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
1312
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
13+
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
14+
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
1415
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
15-
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
1616
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
1717
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
1818
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
19-
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
20-
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
2119
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
2220
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
2321
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
24-
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |

cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ int main()
4848
int i = 0;
4949

5050
scanf("%d", &i);
51-
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
51+
use(i); // GOOD. Design choice: already initialized variables are fine.
5252
}
5353

5454
{
@@ -261,23 +261,23 @@ int main()
261261
i = 0;
262262

263263
scanf("%d", &i);
264-
use(i); // BAD
264+
use(i); // GOOD
265265
}
266266

267267
{
268268
int i;
269269

270270
set_by_ref(i);
271271
scanf("%d", &i);
272-
use(i); // BAD
272+
use(i); // GOOD [FALSE POSITIVE]
273273
}
274274

275275
{
276276
int i;
277277

278278
set_by_ptr(&i);
279279
scanf("%d", &i);
280-
use(i); // BAD
280+
use(i); // GOOD [FALSE POSITIVE]
281281
}
282282

283283
{
@@ -299,15 +299,15 @@ int main()
299299
int *ptr_i = &i;
300300

301301
scanf("%d", &i);
302-
use(*ptr_i); // BAD: may not have written `i`
302+
use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
303303
}
304304

305305
{
306306
int i;
307307
int *ptr_i = &i;
308308

309309
scanf("%d", ptr_i);
310-
use(i); // BAD: may not have written `*ptr_i`
310+
use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
311311
}
312312

313313
{
@@ -427,5 +427,5 @@ void scan_and_write() {
427427
void scan_and_static_variable() {
428428
static int i;
429429
scanf("%d", &i);
430-
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
430+
use(i); // GOOD: static variables are always 0-initialized
431431
}

0 commit comments

Comments
 (0)