Skip to content

Commit 6b13a8c

Browse files
authored
Merge pull request #15504 from MathiasVP/block-summary-flow-out-of-strdup-and-friends
C++: Block summary flow through `strdup` and friends
2 parents f631c01 + 439d3d2 commit 6b13a8c

File tree

9 files changed

+234
-17
lines changed

9 files changed

+234
-17
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ private import SsaInternals as Ssa
77
private import DataFlowImplCommon as DataFlowImplCommon
88
private import codeql.util.Unit
99
private import Node0ToString
10+
private import ModelUtil
11+
private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as IO
12+
private import semmle.code.cpp.models.interfaces.DataFlow as DF
1013

1114
cached
1215
private module Cached {
@@ -1178,6 +1181,19 @@ private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, Parame
11781181
)
11791182
}
11801183

1184+
pragma[nomagic]
1185+
private predicate isInputOutput(
1186+
DF::DataFlowFunction target, Node node1, Node node2, IO::FunctionInput input,
1187+
IO::FunctionOutput output
1188+
) {
1189+
exists(CallInstruction call |
1190+
node1 = callInput(call, input) and
1191+
node2 = callOutput(call, output) and
1192+
call.getStaticCallTarget() = target and
1193+
target.hasDataFlow(input, output)
1194+
)
1195+
}
1196+
11811197
/**
11821198
* Holds if the data-flow step from `node1` to `node2` can be used to
11831199
* determine where side-effects may return from a callable.
@@ -1189,6 +1205,11 @@ private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, Parame
11891205
* int x = *p;
11901206
* ```
11911207
* does not preserve the identity of `*p`.
1208+
*
1209+
* Similarly, a function that copies the contents of a string into a new location
1210+
* does not also preserve the identity. For example, `strdup(p)` does not
1211+
* preserve the identity of `*p` (since it allocates new storage and copies
1212+
* the string into the new storage).
11921213
*/
11931214
bindingset[node1, node2]
11941215
pragma[inline_late]
@@ -1225,7 +1246,16 @@ predicate validParameterAliasStep(Node node1, Node node2) {
12251246
not exists(Operand operand |
12261247
node1.asOperand() = operand and
12271248
node2.asInstruction().(StoreInstruction).getSourceValueOperand() = operand
1249+
) and
1250+
(
1251+
// Either this is not a modeled flow.
1252+
not isInputOutput(_, node1, node2, _, _)
1253+
or
1254+
exists(DF::DataFlowFunction target, IO::FunctionInput input, IO::FunctionOutput output |
1255+
// Or it is a modeled flow and there's `*input` to `*output` flow
1256+
isInputOutput(target, node1, node2, input.getIndirectionInput(), output.getIndirectionOutput()) and
1257+
// and in that case there should also be `input` to `output` flow
1258+
target.hasDataFlow(input, output)
1259+
)
12281260
)
1229-
// TODO: Also block flow through models that don't preserve identity such
1230-
// as `strdup`.
12311261
}

cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ private newtype TFunctionInput =
2323
class FunctionInput extends TFunctionInput {
2424
abstract string toString();
2525

26+
/**
27+
* INTERNAL: Do not use.
28+
*
29+
* Gets the `FunctionInput` that represents the indirection of this input,
30+
* if any.
31+
*/
32+
FunctionInput getIndirectionInput() { none() }
33+
2634
/**
2735
* Holds if this is the input value of the parameter with index `index`.
2836
*
@@ -226,6 +234,8 @@ class InParameter extends FunctionInput, TInParameter {
226234
ParameterIndex getIndex() { result = index }
227235

228236
override predicate isParameter(ParameterIndex i) { i = index }
237+
238+
override FunctionInput getIndirectionInput() { result = TInParameterDeref(index, 1) }
229239
}
230240

231241
/**
@@ -257,6 +267,10 @@ class InParameterDeref extends FunctionInput, TInParameterDeref {
257267
override predicate isParameterDeref(ParameterIndex i, int indirection) {
258268
i = index and indirectionIndex = indirection
259269
}
270+
271+
override FunctionInput getIndirectionInput() {
272+
result = TInParameterDeref(index, indirectionIndex + 1)
273+
}
260274
}
261275

262276
/**
@@ -275,6 +289,8 @@ class InQualifierObject extends FunctionInput, TInQualifierObject {
275289
override string toString() { result = "InQualifierObject" }
276290

277291
override predicate isQualifierObject() { any() }
292+
293+
override FunctionInput getIndirectionInput() { none() }
278294
}
279295

280296
/**
@@ -293,6 +309,8 @@ class InQualifierAddress extends FunctionInput, TInQualifierAddress {
293309
override string toString() { result = "InQualifierAddress" }
294310

295311
override predicate isQualifierAddress() { any() }
312+
313+
override FunctionInput getIndirectionInput() { result = TInQualifierObject() }
296314
}
297315

298316
/**
@@ -321,6 +339,8 @@ class InReturnValueDeref extends FunctionInput, TInReturnValueDeref {
321339
override string toString() { result = "InReturnValueDeref" }
322340

323341
override predicate isReturnValueDeref() { any() }
342+
343+
override FunctionInput getIndirectionInput() { none() }
324344
}
325345

326346
private newtype TFunctionOutput =
@@ -340,6 +360,14 @@ private newtype TFunctionOutput =
340360
class FunctionOutput extends TFunctionOutput {
341361
abstract string toString();
342362

363+
/**
364+
* INTERNAL: Do not use.
365+
*
366+
* Gets the `FunctionOutput` that represents the indirection of this output,
367+
* if any.
368+
*/
369+
FunctionOutput getIndirectionOutput() { none() }
370+
343371
/**
344372
* Holds if this is the output value pointed to by a pointer parameter to a function, or the
345373
* output value referred to by a reference parameter to a function, where the parameter has
@@ -512,6 +540,10 @@ class OutParameterDeref extends FunctionOutput, TOutParameterDeref {
512540
override predicate isParameterDeref(ParameterIndex i, int ind) {
513541
i = index and ind = indirectionIndex
514542
}
543+
544+
override FunctionOutput getIndirectionOutput() {
545+
result = TOutParameterDeref(index, indirectionIndex + 1)
546+
}
515547
}
516548

517549
/**
@@ -530,6 +562,8 @@ class OutQualifierObject extends FunctionOutput, TOutQualifierObject {
530562
override string toString() { result = "OutQualifierObject" }
531563

532564
override predicate isQualifierObject() { any() }
565+
566+
override FunctionOutput getIndirectionOutput() { none() }
533567
}
534568

535569
/**
@@ -552,6 +586,8 @@ class OutReturnValue extends FunctionOutput, TOutReturnValue {
552586
override string toString() { result = "OutReturnValue" }
553587

554588
override predicate isReturnValue() { any() }
589+
590+
override FunctionOutput getIndirectionOutput() { result = TOutReturnValueDeref(1) }
555591
}
556592

557593
/**
@@ -571,11 +607,19 @@ class OutReturnValue extends FunctionOutput, TOutReturnValue {
571607
* of `getInt()` is neither a pointer nor a reference.
572608
*/
573609
class OutReturnValueDeref extends FunctionOutput, TOutReturnValueDeref {
610+
int indirectionIndex;
611+
612+
OutReturnValueDeref() { this = TOutReturnValueDeref(indirectionIndex) }
613+
574614
override string toString() { result = "OutReturnValueDeref" }
575615

576616
override predicate isReturnValueDeref() { any() }
577617

578-
override predicate isReturnValueDeref(int indirectionIndex) {
579-
this = TOutReturnValueDeref(indirectionIndex)
618+
override predicate isReturnValueDeref(int indirectionIndex_) {
619+
indirectionIndex = indirectionIndex_
620+
}
621+
622+
override FunctionOutput getIndirectionOutput() {
623+
result = TOutReturnValueDeref(indirectionIndex + 1)
580624
}
581625
}

cpp/ql/test/library-tests/dataflow/dataflow-tests/TestBase.qll

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,53 @@ module IRTest {
4949
import semmle.code.cpp.ir.dataflow.DataFlow
5050
private import semmle.code.cpp.ir.IR
5151
private import semmle.code.cpp.controlflow.IRGuards
52+
private import semmle.code.cpp.models.interfaces.DataFlow
53+
54+
boolean isOne(string s) {
55+
s = "1" and result = true
56+
or
57+
s = "0" and result = false
58+
}
59+
60+
/**
61+
* A model of a test function called `strdup_ptr_xyz` where `x, y, z in {0, 1}`.
62+
* `x` is 1 if there's flow from the argument to the function return,
63+
* `y` is 1 if there's flow from the first indirection of the argument to
64+
* the first indirection of the function return, and
65+
* `z` is 1 if there's flow from the second indirection of the argument to
66+
* the second indirection of the function return.
67+
*/
68+
class StrDupPtr extends DataFlowFunction {
69+
boolean argToReturnFlow;
70+
boolean argIndToReturnInd;
71+
boolean argIndInToReturnIndInd;
72+
73+
StrDupPtr() {
74+
exists(string r |
75+
r = "strdup_ptr_([01])([01])([01])" and
76+
argToReturnFlow = isOne(this.getName().regexpCapture(r, 1)) and
77+
argIndToReturnInd = isOne(this.getName().regexpCapture(r, 2)) and
78+
argIndInToReturnIndInd = isOne(this.getName().regexpCapture(r, 3))
79+
)
80+
}
81+
82+
/**
83+
* Flow from `**ptr` to `**return`
84+
*/
85+
override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
86+
argToReturnFlow = true and
87+
input.isParameter(0) and
88+
output.isReturnValue()
89+
or
90+
argIndToReturnInd = true and
91+
input.isParameterDeref(0, 1) and
92+
output.isReturnValueDeref(1)
93+
or
94+
argIndInToReturnIndInd = true and
95+
input.isParameterDeref(0, 2) and
96+
output.isReturnValueDeref(2)
97+
}
98+
}
5299

53100
/**
54101
* A `BarrierGuard` that stops flow to all occurrences of `x` within statement

cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-consistency.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ postIsInSameCallable
2424
reverseRead
2525
argHasPostUpdate
2626
| flowOut.cpp:55:14:55:16 | * ... | ArgumentNode is missing PostUpdateNode. |
27+
| flowOut.cpp:185:8:185:9 | * ... | ArgumentNode is missing PostUpdateNode. |
2728
| lambdas.cpp:18:7:18:7 | a | ArgumentNode is missing PostUpdateNode. |
2829
| lambdas.cpp:25:2:25:2 | b | ArgumentNode is missing PostUpdateNode. |
2930
| lambdas.cpp:32:2:32:2 | c | ArgumentNode is missing PostUpdateNode. |
@@ -64,6 +65,8 @@ postWithInFlow
6465
| flowOut.cpp:90:3:90:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
6566
| flowOut.cpp:90:4:90:4 | q [inner post update] | PostUpdateNode should not be the target of local flow. |
6667
| flowOut.cpp:101:14:101:14 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
68+
| flowOut.cpp:168:3:168:10 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
69+
| flowOut.cpp:168:4:168:10 | toTaint [inner post update] | PostUpdateNode should not be the target of local flow. |
6770
| globals.cpp:13:5:13:19 | flowTestGlobal1 [post update] | PostUpdateNode should not be the target of local flow. |
6871
| globals.cpp:23:5:23:19 | flowTestGlobal2 [post update] | PostUpdateNode should not be the target of local flow. |
6972
| lambdas.cpp:23:3:23:14 | v [post update] | PostUpdateNode should not be the target of local flow. |

cpp/ql/test/library-tests/dataflow/dataflow-tests/dataflow-ir-consistency.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ reverseRead
2020
argHasPostUpdate
2121
postWithInFlow
2222
| flowOut.cpp:84:3:84:14 | *access to array | PostUpdateNode should not be the target of local flow. |
23+
| flowOut.cpp:111:28:111:31 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
2324
| test.cpp:384:10:384:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
2425
| test.cpp:391:10:391:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
2526
| test.cpp:400:10:400:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |

cpp/ql/test/library-tests/dataflow/dataflow-tests/flowOut.cpp

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ void modify_copy_via_strdup(char* p) { // $ ast-def=p
7070

7171
void test_modify_copy_via_strdup(char* p) { // $ ast-def=p
7272
modify_copy_via_strdup(p);
73-
sink(*p); // $ SPURIOUS: ir
73+
sink(*p); // clean
7474
}
7575

7676
int* deref(int** p) { // $ ast-def=p
@@ -101,3 +101,103 @@ void test2() {
101101
addtaint2(&p);
102102
sink(*p); // $ ir MISSING: ast
103103
}
104+
105+
using size_t = decltype(sizeof(int));
106+
107+
void* memcpy(void* dest, const void* src, size_t);
108+
109+
void modify_copy_via_memcpy(char* p) { // $ ast-def=p
110+
char* dest;
111+
char* p2 = (char*)memcpy(dest, p, 10);
112+
source_ref(p2);
113+
}
114+
115+
void test_modify_copy_via_memcpy(char* p) { // $ ast-def=p
116+
modify_copy_via_memcpy(p);
117+
sink(*p); // clean
118+
}
119+
120+
// These functions from any real database. We add a dataflow model of
121+
// them as part of dataflow library testing.
122+
// `r = strdup_ptr_001`(p) has flow from **p to **r
123+
// `r = strdup_ptr_011`(p) has flow from *p to *r, and **p to **r
124+
// `r = strdup_ptr_111`(p) has flow from p to r, *p to *r, **p to **r
125+
char** strdup_ptr_001(const char** p);
126+
char** strdup_ptr_011(const char** p);
127+
char** strdup_ptr_111(const char** p);
128+
129+
void source_ref_ref(char** toTaint) { // $ ast-def=toTaint ir-def=*toTaint ir-def=**toTaint
130+
// source -> **toTaint
131+
**toTaint = source(true);
132+
}
133+
134+
// This function copies the value of **p into a new location **p2 and then
135+
// taints **p. Thus, **p does not contain tainted data after returning from
136+
// this function.
137+
void modify_copy_via_strdup_ptr_001(char** p) { // $ ast-def=p
138+
// **p -> **p2
139+
char** p2 = strdup_ptr_001(p);
140+
// source -> **p2
141+
source_ref_ref(p2);
142+
}
143+
144+
void test_modify_copy_via_strdup_001(char** p) { // $ ast-def=p
145+
modify_copy_via_strdup_ptr_001(p);
146+
sink(**p); // clean
147+
}
148+
149+
// This function copies the value of *p into a new location *p2 and then
150+
// taints **p2. Thus, **p contains tainted data after returning from this
151+
// function.
152+
void modify_copy_via_strdup_ptr_011(char** p) { // $ ast-def=p
153+
// **p -> **p2 and *p -> *p2
154+
char** p2 = strdup_ptr_011(p);
155+
// source -> **p2
156+
source_ref_ref(p2);
157+
}
158+
159+
void test_modify_copy_via_strdup_011(char** p) { // $ ast-def=p
160+
modify_copy_via_strdup_ptr_011(p);
161+
sink(**p); // $ ir MISSING: ast
162+
}
163+
164+
char* source(int);
165+
166+
void source_ref_2(char** toTaint) { // $ ast-def=toTaint ir-def=*toTaint ir-def=**toTaint
167+
// source -> *toTaint
168+
*toTaint = source(42);
169+
}
170+
171+
// This function copies the value of p into a new location p2 and then
172+
// taints *p2. Thus, *p contains tainted data after returning from this
173+
// function.
174+
void modify_copy_via_strdup_ptr_111_taint_ind(char** p) { // $ ast-def=p
175+
// **p -> **p2, *p -> *p2, and p -> p2
176+
char** p2 = strdup_ptr_111(p);
177+
// source -> *p2
178+
source_ref_2(p2);
179+
}
180+
181+
void sink(char*);
182+
183+
void test_modify_copy_via_strdup_111_taint_ind(char** p) { // $ ast-def=p
184+
modify_copy_via_strdup_ptr_111_taint_ind(p);
185+
sink(*p); // $ ir MISSING: ast
186+
}
187+
188+
// This function copies the value of p into a new location p2 and then
189+
// taints **p2. Thus, **p contains tainted data after returning from this
190+
// function.
191+
void modify_copy_via_strdup_ptr_111_taint_ind_ind(char** p) { // $ ast-def=p
192+
// **p -> **p2, *p -> *p2, and p -> p2
193+
char** p2 = strdup_ptr_111(p);
194+
// source -> **p2
195+
source_ref_ref(p2);
196+
}
197+
198+
void sink(char*);
199+
200+
void test_modify_copy_via_strdup_111_taint_ind_ind(char** p) { // $ ast-def=p
201+
modify_copy_via_strdup_ptr_111_taint_ind_ind(p);
202+
sink(**p); // $ ir MISSING: ast
203+
}

cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,11 @@ irFlow
174174
| dispatch.cpp:144:8:144:13 | call to source | dispatch.cpp:96:8:96:8 | x |
175175
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:31:9:31:9 | x |
176176
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:61:8:61:11 | access to array |
177-
| flowOut.cpp:8:16:8:23 | call to source | flowOut.cpp:73:8:73:9 | * ... |
178177
| flowOut.cpp:84:18:84:23 | call to source | flowOut.cpp:85:8:85:9 | * ... |
179178
| flowOut.cpp:90:8:90:13 | call to source | flowOut.cpp:102:8:102:9 | * ... |
179+
| flowOut.cpp:131:15:131:20 | call to source | flowOut.cpp:161:8:161:10 | * ... |
180+
| flowOut.cpp:131:15:131:20 | call to source | flowOut.cpp:202:8:202:10 | * ... |
181+
| flowOut.cpp:168:14:168:19 | call to source | flowOut.cpp:185:8:185:9 | * ... |
180182
| globals.cpp:5:17:5:22 | call to source | globals.cpp:6:10:6:14 | local |
181183
| globals.cpp:13:23:13:28 | call to source | globals.cpp:12:10:12:24 | flowTestGlobal1 |
182184
| globals.cpp:23:23:23:28 | call to source | globals.cpp:19:10:19:24 | flowTestGlobal2 |

0 commit comments

Comments
 (0)