Skip to content

Commit 3d8d3f6

Browse files
committed
C++: Use the new dataflow library in the 'missing scanf' query.
1 parent 8c46bfd commit 3d8d3f6

File tree

3 files changed

+93
-125
lines changed

3 files changed

+93
-125
lines changed

cpp/ql/src/Critical/MissingCheckScanf.ql

Lines changed: 82 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -16,144 +16,119 @@
1616
import cpp
1717
import semmle.code.cpp.commons.Scanf
1818
import semmle.code.cpp.controlflow.Guards
19-
import semmle.code.cpp.ir.dataflow.DataFlow
19+
import semmle.code.cpp.dataflow.new.DataFlow::DataFlow
2020
import semmle.code.cpp.ir.IR
2121
import semmle.code.cpp.ir.ValueNumbering
2222

2323
/**
24-
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
25-
*
26-
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
27-
* of `call`, and `vn` is the value number of `instr.`
28-
*/
29-
predicate isSource(ScanfFunctionCall call, int index, Instruction instr, ValueNumber vn, Expr output) {
30-
output = call.getOutputArgument(index).getFullyConverted() and
31-
instr.getConvertedResultExpression() = output and
32-
vn.getAnInstruction() = instr
33-
}
34-
35-
/**
36-
* Holds if `instr` is control-flow reachable in 0 or more steps from
37-
* a call to a `scanf`-like function.
24+
* Holds if `instr` is part of a path from a call to a `scanf`-like function
25+
* to a use of the written variable.
3826
*/
3927
pragma[nomagic]
40-
predicate fwdFlow0(Instruction instr) {
41-
isSource(_, _, instr, _, _)
28+
predicate revFlow0(Node n) {
29+
isSink(_, _, n, _)
4230
or
43-
exists(Instruction prev |
44-
fwdFlow0(prev) and
45-
prev.getASuccessor() = instr
46-
)
31+
exists(Node succ | revFlow0(succ) | localFlowStep(n, succ))
4732
}
4833

4934
/**
50-
* Holds if `instr` is part of the IR translation of `access` that
51-
* is not an expression being deallocated, and `instr` has value
52-
* number `vn`.
35+
* Holds if `n` represents an uninitialized stack-allocated variable, or a
36+
* newly (and presumed uninitialized) heap allocation.
5337
*/
54-
predicate isSink(Instruction instr, Access access, ValueNumber vn) {
55-
instr.getAst() = access and
56-
not any(DeallocationExpr dealloc).getFreedExpr() = access and
57-
vn.getAnInstruction() = instr
38+
predicate isUninitialized(Node n) {
39+
exists(n.asUninitialized()) or
40+
n.asIndirectExpr(1) instanceof AllocationExpr
5841
}
5942

60-
/**
61-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
62-
* to a use of the written variable.
63-
*/
6443
pragma[nomagic]
65-
predicate revFlow0(Instruction instr) {
66-
fwdFlow0(instr) and
44+
predicate fwdFlow0(Node n) {
45+
revFlow0(n) and
6746
(
68-
isSink(instr, _, _)
47+
isUninitialized(n)
6948
or
70-
exists(Instruction succ | revFlow0(succ) | instr.getASuccessor() = succ)
49+
exists(Node prev |
50+
fwdFlow0(prev) and
51+
localFlowStep(prev, n)
52+
)
7153
)
7254
}
7355

56+
predicate isSink(ScanfFunctionCall call, int index, Node n, Expr input) {
57+
input = call.getOutputArgument(index) and
58+
n.asIndirectExpr() = input
59+
}
60+
7461
/**
75-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
76-
* that writes to a variable with value number `vn`, without passing through
77-
* redefinitions of the variable.
62+
* Holds if `call` is a `scanf`-like call and `output` is the `index`'th
63+
* argument that has not been previously initialized.
7864
*/
79-
pragma[nomagic]
80-
private predicate fwdFlow(Instruction instr, ValueNumber vn) {
81-
revFlow0(instr) and
82-
(
83-
isSource(_, _, instr, vn, _)
84-
or
85-
exists(Instruction prev |
86-
fwdFlow(prev, vn) and
87-
prev.getASuccessor() = instr and
88-
not isBarrier(instr, vn)
89-
)
90-
)
65+
predicate isRelevantScanfCall(ScanfFunctionCall call, int index, Expr output) {
66+
exists(Node n | fwdFlow0(n) and isSink(call, index, n, output))
9167
}
9268

9369
/**
94-
* Holds if `instr` is part of a path from a call to a `scanf`-like function
95-
* that writes to a variable with value number `vn`, without passing through
96-
* redefinitions of the variable.
70+
* Holds if `call` is a `scanf`-like function that may write to `output` at index `index`.
9771
*
98-
* Note: This predicate only holds for the `(intr, vn)` pairs that are also
99-
* control-flow reachable from an argument to a `scanf`-like function call.
72+
* Furthermore, `instr` is the instruction that defines the address of the `index`'th argument
73+
* of `call`, and `vn` is the value number of `instr.`
74+
*/
75+
predicate isSource(ScanfFunctionCall call, int index, Node n, Expr output) {
76+
isRelevantScanfCall(call, index, output) and
77+
output = call.getOutputArgument(index) and
78+
n.asDefiningArgument() = output
79+
}
80+
81+
/**
82+
* Holds if `instr` is control-flow reachable in 0 or more steps from
83+
* a call to a `scanf`-like function.
10084
*/
10185
pragma[nomagic]
102-
predicate revFlow(Instruction instr, ValueNumber vn) {
103-
fwdFlow(instr, pragma[only_bind_out](vn)) and
104-
(
105-
isSink(instr, _, vn)
106-
or
107-
exists(Instruction succ | revFlow(succ, vn) |
108-
instr.getASuccessor() = succ and
109-
not isBarrier(succ, vn)
110-
)
86+
predicate fwdFlow(Node n) {
87+
isSource(_, _, n, _)
88+
or
89+
exists(Node prev |
90+
fwdFlow(prev) and
91+
localFlowStep(prev, n) and
92+
not isSink(prev, _)
11193
)
11294
}
11395

11496
/**
115-
* A type that bundles together a reachable instruction with the appropriate
116-
* value number (i.e., the value number that's transferred from the source
117-
* to the sink).
97+
* Holds if `instr` is part of the IR translation of `access` that
98+
* is not an expression being deallocated, and `instr` has value
99+
* number `vn`.
118100
*/
119-
newtype TNode = MkNode(Instruction instr, ValueNumber vn) { revFlow(instr, vn) }
120-
121-
class Node extends MkNode {
122-
ValueNumber vn;
123-
Instruction instr;
124-
125-
Node() { this = MkNode(instr, vn) }
126-
127-
final string toString() { result = instr.toString() }
128-
129-
final Node getASuccessor() { result = MkNode(pragma[only_bind_out](instr.getASuccessor()), vn) }
130-
131-
final Location getLocation() { result = instr.getLocation() }
101+
predicate isSink(Node n, Expr e) {
102+
n.asExpr() = e and
103+
not any(DeallocationExpr dealloc).getFreedExpr() = e
132104
}
133105

134106
/**
135-
* Holds if `instr` is an instruction with value number `vn` that is
136-
* used in a store operation, or is overwritten by another call to
137-
* a `scanf`-like function.
107+
* Holds if `instr` is part of a path from a call to a `scanf`-like function
108+
* to a use of the written variable.
138109
*/
139-
private predicate isBarrier(Instruction instr, ValueNumber vn) {
140-
// We only need to compute barriers for instructions that we
141-
// managed to hit during the initial flow stage.
142-
revFlow0(pragma[only_bind_into](instr)) and
143-
valueNumber(instr) = vn and
144-
exists(Expr e | instr.getAst() = e |
145-
instr = any(StoreInstruction s).getDestinationAddress()
110+
pragma[nomagic]
111+
predicate revFlow(Node n) {
112+
fwdFlow(n) and
113+
(
114+
isSink(n, _)
146115
or
147-
isSource(_, _, _, _, [e, e.getParent().(AddressOfExpr)])
116+
exists(Node succ |
117+
revFlow(succ) and
118+
localFlowStep(n, succ) and
119+
not isSink(n, _)
120+
)
148121
)
149122
}
150123

151-
/** Holds if `n1` steps to `n2` in a single step. */
152-
predicate isSuccessor(Node n1, Node n2) { n1.getASuccessor() = n2 }
153-
154-
predicate hasFlow(Node n1, Node n2) = fastTC(isSuccessor/2)(n1, n2)
124+
/** A local flow step, restricted to relevant dataflow nodes. */
125+
private predicate step(Node n1, Node n2) {
126+
revFlow(n1) and
127+
revFlow(n2) and
128+
localFlowStep(n1, n2)
129+
}
155130

156-
Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
131+
predicate hasFlow(Node n1, Node n2) = fastTC(step/2)(n1, n2)
157132

158133
/**
159134
* Holds if `source` is the `index`'th argument to the `scanf`-like call `call`, and `sink` is
@@ -162,22 +137,18 @@ Node getNode(Instruction instr, ValueNumber vn) { result = MkNode(instr, vn) }
162137
*
163138
* Furthermore, `source` and `sink` have identical global value numbers.
164139
*/
165-
predicate hasFlow(
166-
Instruction source, ScanfFunctionCall call, int index, Instruction sink, Access access
167-
) {
168-
exists(ValueNumber vn |
169-
isSource(call, index, source, vn, _) and
170-
hasFlow(getNode(source, pragma[only_bind_into](vn)), getNode(sink, pragma[only_bind_into](vn))) and
171-
isSink(sink, access, vn)
172-
)
140+
predicate hasFlow(Node source, ScanfFunctionCall call, int index, Node sink, Expr e) {
141+
isSource(call, index, source, _) and
142+
hasFlow(source, sink) and
143+
isSink(sink, e)
173144
}
174145

175146
/**
176147
* Gets the smallest possible `scanf` return value of `call` that would indicate
177148
* success in writing the output argument at index `index`.
178149
*/
179150
int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
180-
isSource(call, index, _, _, _) and
151+
isSource(call, index, _, _) and
181152
result =
182153
index + 1 -
183154
count(ScanfFormatLiteral f, int n |
@@ -191,7 +162,7 @@ int getMinimumGuardConstant(ScanfFunctionCall call, int index) {
191162
* Holds the access to `e` isn't guarded by a check that ensures that `call` returned
192163
* at least `minGuard`.
193164
*/
194-
predicate hasNonGuardedAccess(ScanfFunctionCall call, Access e, int minGuard) {
165+
predicate hasNonGuardedAccess(ScanfFunctionCall call, Expr e, int minGuard) {
195166
exists(int index |
196167
hasFlow(_, call, index, _, e) and
197168
minGuard = getMinimumGuardConstant(call, index)
@@ -211,7 +182,7 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
211182
exists(GuardCondition g, Expr left, Expr right |
212183
right = g.getAChild() and
213184
value = left.getValue().toInt() and
214-
DataFlow::localExprFlow(call, right)
185+
localExprFlow(call, right)
215186
|
216187
g.ensuresEq(left, right, 0, result, true) and op = "=="
217188
or
@@ -221,9 +192,9 @@ BasicBlock blockGuardedBy(int value, string op, ScanfFunctionCall call) {
221192
)
222193
}
223194

224-
from ScanfFunctionCall call, Access access, int minGuard
225-
where hasNonGuardedAccess(call, access, minGuard)
226-
select access,
195+
from ScanfFunctionCall call, Expr e, int minGuard
196+
where hasNonGuardedAccess(call, e, minGuard)
197+
select e,
227198
"This variable is read, but may not have been written. " +
228199
"It should be guarded by a check that the $@ returns at least " + minGuard + ".", call,
229200
call.toString()
Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,21 @@
11
| test.cpp:35:7:35:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:34:3:34:7 | call to scanf | call to scanf |
2-
| test.cpp:51:7:51:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:50:3:50:7 | call to scanf | call to scanf |
32
| test.cpp:68:7:68:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:67:3:67:7 | call to scanf | call to scanf |
43
| test.cpp:80:7:80:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:79:3:79:7 | call to scanf | call to scanf |
5-
| test.cpp:90:8:90:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
6-
| test.cpp:98:8:98:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
4+
| test.cpp:90:7:90:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:89:3:89:7 | call to scanf | call to scanf |
5+
| test.cpp:98:7:98:8 | * ... | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:97:3:97:7 | call to scanf | call to scanf |
76
| test.cpp:108:7:108:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:107:3:107:8 | call to fscanf | call to fscanf |
87
| test.cpp:115:7:115:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:114:3:114:8 | call to sscanf | call to sscanf |
98
| test.cpp:164:8:164:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:162:7:162:11 | call to scanf | call to scanf |
109
| test.cpp:173:8:173:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:171:7:171:11 | call to scanf | call to scanf |
1110
| test.cpp:205:8:205:8 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:204:7:204:11 | call to scanf | call to scanf |
1211
| test.cpp:224:8:224:8 | j | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:221:7:221:11 | call to scanf | call to scanf |
1312
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
13+
| test.cpp:248:9:248:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
14+
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:246:25:246:29 | call to scanf | call to scanf |
1415
| test.cpp:252:9:252:9 | d | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 2. | test.cpp:250:14:250:18 | call to scanf | call to scanf |
15-
| test.cpp:264:7:264:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:263:3:263:7 | call to scanf | call to scanf |
1616
| test.cpp:272:7:272:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:271:3:271:7 | call to scanf | call to scanf |
1717
| test.cpp:280:7:280:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:279:3:279:7 | call to scanf | call to scanf |
1818
| test.cpp:292:7:292:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:291:3:291:7 | call to scanf | call to scanf |
19-
| test.cpp:302:8:302:12 | ptr_i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:301:3:301:7 | call to scanf | call to scanf |
20-
| test.cpp:310:7:310:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:309:3:309:7 | call to scanf | call to scanf |
2119
| test.cpp:404:25:404:25 | u | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:403:6:403:11 | call to sscanf | call to sscanf |
2220
| test.cpp:416:7:416:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:413:7:413:11 | call to scanf | call to scanf |
2321
| test.cpp:423:7:423:7 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:420:7:420:11 | call to scanf | call to scanf |
24-
| test.cpp:430:6:430:6 | i | This variable is read, but may not have been written. It should be guarded by a check that the $@ returns at least 1. | test.cpp:429:2:429:6 | call to scanf | call to scanf |

cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ int main()
4848
int i = 0;
4949

5050
scanf("%d", &i);
51-
use(i); // BAD. Design choice: already initialized variables shouldn't make a difference.
51+
use(i); // GOOD. Design choice: already initialized variables are fine.
5252
}
5353

5454
{
@@ -261,23 +261,23 @@ int main()
261261
i = 0;
262262

263263
scanf("%d", &i);
264-
use(i); // BAD
264+
use(i); // GOOD
265265
}
266266

267267
{
268268
int i;
269269

270270
set_by_ref(i);
271271
scanf("%d", &i);
272-
use(i); // BAD
272+
use(i); // GOOD [FALSE POSITIVE]
273273
}
274274

275275
{
276276
int i;
277277

278278
set_by_ptr(&i);
279279
scanf("%d", &i);
280-
use(i); // BAD
280+
use(i); // GOOD [FALSE POSITIVE]
281281
}
282282

283283
{
@@ -299,15 +299,15 @@ int main()
299299
int *ptr_i = &i;
300300

301301
scanf("%d", &i);
302-
use(*ptr_i); // BAD: may not have written `i`
302+
use(*ptr_i); // BAD [NOT DETECTED]: may not have written `i`
303303
}
304304

305305
{
306306
int i;
307307
int *ptr_i = &i;
308308

309309
scanf("%d", ptr_i);
310-
use(i); // BAD: may not have written `*ptr_i`
310+
use(i); // BAD [NOT DETECTED]: may not have written `*ptr_i`
311311
}
312312

313313
{
@@ -427,5 +427,5 @@ void scan_and_write() {
427427
void scan_and_static_variable() {
428428
static int i;
429429
scanf("%d", &i);
430-
use(i); // GOOD [FALSE POSITIVE]: static variables are always 0-initialized
430+
use(i); // GOOD: static variables are always 0-initialized
431431
}

0 commit comments

Comments
 (0)