Skip to content

C++: Block summary flow through strdup and friends #15504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ private import SsaInternals as Ssa
private import DataFlowImplCommon as DataFlowImplCommon
private import codeql.util.Unit
private import Node0ToString
private import ModelUtil
private import semmle.code.cpp.models.interfaces.FunctionInputsAndOutputs as IO
private import semmle.code.cpp.models.interfaces.DataFlow as DF

cached
private module Cached {
Expand Down Expand Up @@ -1178,6 +1181,19 @@ private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, Parame
)
}

pragma[nomagic]
private predicate isInputOutput(
DF::DataFlowFunction target, Node node1, Node node2, IO::FunctionInput input,
IO::FunctionOutput output
) {
exists(CallInstruction call |
node1 = callInput(call, input) and
node2 = callOutput(call, output) and
call.getStaticCallTarget() = target and
target.hasDataFlow(input, output)
)
}

/**
* Holds if the data-flow step from `node1` to `node2` can be used to
* determine where side-effects may return from a callable.
Expand All @@ -1189,6 +1205,11 @@ private int countNumberOfBranchesUsingParameter(SwitchInstruction switch, Parame
* int x = *p;
* ```
* does not preserve the identity of `*p`.
*
* Similarly, a function that copies the contents of a string into a new location
* does not also preserve the identity. For example, `strdup(p)` does not
* preserve the identity of `*p` (since it allocates new storage and copies
* the string into the new storage).
*/
bindingset[node1, node2]
pragma[inline_late]
Expand Down Expand Up @@ -1225,7 +1246,16 @@ predicate validParameterAliasStep(Node node1, Node node2) {
not exists(Operand operand |
node1.asOperand() = operand and
node2.asInstruction().(StoreInstruction).getSourceValueOperand() = operand
) and
(
// Either this is not a modeled flow.
not isInputOutput(_, node1, node2, _, _)
or
exists(DF::DataFlowFunction target, IO::FunctionInput input, IO::FunctionOutput output |
// Or it is a modeled flow and there's `*input` to `*output` flow
isInputOutput(target, node1, node2, input.getIndirectionInput(), output.getIndirectionOutput()) and
// and in that case there should also be `input` to `output` flow
target.hasDataFlow(input, output)
)
)
// TODO: Also block flow through models that don't preserve identity such
// as `strdup`.
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ private newtype TFunctionInput =
class FunctionInput extends TFunctionInput {
abstract string toString();

/**
* INTERNAL: Do not use.
*
* Gets the `FunctionInput` that represents the indirection of this input,
* if any.
*/
FunctionInput getIndirectionInput() { none() }

/**
* Holds if this is the input value of the parameter with index `index`.
*
Expand Down Expand Up @@ -226,6 +234,8 @@ class InParameter extends FunctionInput, TInParameter {
ParameterIndex getIndex() { result = index }

override predicate isParameter(ParameterIndex i) { i = index }

override FunctionInput getIndirectionInput() { result = TInParameterDeref(index, 1) }
}

/**
Expand Down Expand Up @@ -257,6 +267,10 @@ class InParameterDeref extends FunctionInput, TInParameterDeref {
override predicate isParameterDeref(ParameterIndex i, int indirection) {
i = index and indirectionIndex = indirection
}

override FunctionInput getIndirectionInput() {
result = TInParameterDeref(index, indirectionIndex + 1)
}
}

/**
Expand All @@ -275,6 +289,8 @@ class InQualifierObject extends FunctionInput, TInQualifierObject {
override string toString() { result = "InQualifierObject" }

override predicate isQualifierObject() { any() }

override FunctionInput getIndirectionInput() { none() }
}

/**
Expand All @@ -293,6 +309,8 @@ class InQualifierAddress extends FunctionInput, TInQualifierAddress {
override string toString() { result = "InQualifierAddress" }

override predicate isQualifierAddress() { any() }

override FunctionInput getIndirectionInput() { result = TInQualifierObject() }
}

/**
Expand Down Expand Up @@ -321,6 +339,8 @@ class InReturnValueDeref extends FunctionInput, TInReturnValueDeref {
override string toString() { result = "InReturnValueDeref" }

override predicate isReturnValueDeref() { any() }

override FunctionInput getIndirectionInput() { none() }
}

private newtype TFunctionOutput =
Expand All @@ -340,6 +360,14 @@ private newtype TFunctionOutput =
class FunctionOutput extends TFunctionOutput {
abstract string toString();

/**
* INTERNAL: Do not use.
*
* Gets the `FunctionOutput` that represents the indirection of this output,
* if any.
*/
FunctionOutput getIndirectionOutput() { none() }

/**
* Holds if this is the output value pointed to by a pointer parameter to a function, or the
* output value referred to by a reference parameter to a function, where the parameter has
Expand Down Expand Up @@ -512,6 +540,10 @@ class OutParameterDeref extends FunctionOutput, TOutParameterDeref {
override predicate isParameterDeref(ParameterIndex i, int ind) {
i = index and ind = indirectionIndex
}

override FunctionOutput getIndirectionOutput() {
result = TOutParameterDeref(index, indirectionIndex + 1)
}
}

/**
Expand All @@ -530,6 +562,8 @@ class OutQualifierObject extends FunctionOutput, TOutQualifierObject {
override string toString() { result = "OutQualifierObject" }

override predicate isQualifierObject() { any() }

override FunctionOutput getIndirectionOutput() { none() }
}

/**
Expand All @@ -552,6 +586,8 @@ class OutReturnValue extends FunctionOutput, TOutReturnValue {
override string toString() { result = "OutReturnValue" }

override predicate isReturnValue() { any() }

override FunctionOutput getIndirectionOutput() { result = TOutReturnValueDeref(1) }
}

/**
Expand All @@ -571,11 +607,19 @@ class OutReturnValue extends FunctionOutput, TOutReturnValue {
* of `getInt()` is neither a pointer nor a reference.
*/
class OutReturnValueDeref extends FunctionOutput, TOutReturnValueDeref {
int indirectionIndex;

OutReturnValueDeref() { this = TOutReturnValueDeref(indirectionIndex) }

override string toString() { result = "OutReturnValueDeref" }

override predicate isReturnValueDeref() { any() }

override predicate isReturnValueDeref(int indirectionIndex) {
this = TOutReturnValueDeref(indirectionIndex)
override predicate isReturnValueDeref(int indirectionIndex_) {
indirectionIndex = indirectionIndex_
}

override FunctionOutput getIndirectionOutput() {
result = TOutReturnValueDeref(indirectionIndex + 1)
}
}
47 changes: 47 additions & 0 deletions cpp/ql/test/library-tests/dataflow/dataflow-tests/TestBase.qll
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,53 @@ module IRTest {
import semmle.code.cpp.ir.dataflow.DataFlow
private import semmle.code.cpp.ir.IR
private import semmle.code.cpp.controlflow.IRGuards
private import semmle.code.cpp.models.interfaces.DataFlow

boolean isOne(string s) {
s = "1" and result = true
or
s = "0" and result = false
}

/**
* A model of a test function called `strdup_ptr_xyz` where `x, y, z in {0, 1}`.
* `x` is 1 if there's flow from the argument to the function return,
* `y` is 1 if there's flow from the first indirection of the argument to
* the first indirection of the function return, and
* `z` is 1 if there's flow from the second indirection of the argument to
* the second indirection of the function return.
*/
class StrDupPtr extends DataFlowFunction {
boolean argToReturnFlow;
boolean argIndToReturnInd;
boolean argIndInToReturnIndInd;

StrDupPtr() {
exists(string r |
r = "strdup_ptr_([01])([01])([01])" and
argToReturnFlow = isOne(this.getName().regexpCapture(r, 1)) and
argIndToReturnInd = isOne(this.getName().regexpCapture(r, 2)) and
argIndInToReturnIndInd = isOne(this.getName().regexpCapture(r, 3))
)
}

/**
* Flow from `**ptr` to `**return`
*/
override predicate hasDataFlow(FunctionInput input, FunctionOutput output) {
argToReturnFlow = true and
input.isParameter(0) and
output.isReturnValue()
or
argIndToReturnInd = true and
input.isParameterDeref(0, 1) and
output.isReturnValueDeref(1)
or
argIndInToReturnIndInd = true and
input.isParameterDeref(0, 2) and
output.isReturnValueDeref(2)
}
}

/**
* A `BarrierGuard` that stops flow to all occurrences of `x` within statement
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ postIsInSameCallable
reverseRead
argHasPostUpdate
| flowOut.cpp:55:14:55:16 | * ... | ArgumentNode is missing PostUpdateNode. |
| flowOut.cpp:185:8:185:9 | * ... | ArgumentNode is missing PostUpdateNode. |
| lambdas.cpp:18:7:18:7 | a | ArgumentNode is missing PostUpdateNode. |
| lambdas.cpp:25:2:25:2 | b | ArgumentNode is missing PostUpdateNode. |
| lambdas.cpp:32:2:32:2 | c | ArgumentNode is missing PostUpdateNode. |
Expand Down Expand Up @@ -64,6 +65,8 @@ postWithInFlow
| flowOut.cpp:90:3:90:4 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:90:4:90:4 | q [inner post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:101:14:101:14 | p [inner post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:168:3:168:10 | * ... [post update] | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:168:4:168:10 | toTaint [inner post update] | PostUpdateNode should not be the target of local flow. |
| globals.cpp:13:5:13:19 | flowTestGlobal1 [post update] | PostUpdateNode should not be the target of local flow. |
| globals.cpp:23:5:23:19 | flowTestGlobal2 [post update] | PostUpdateNode should not be the target of local flow. |
| lambdas.cpp:23:3:23:14 | v [post update] | PostUpdateNode should not be the target of local flow. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ reverseRead
argHasPostUpdate
postWithInFlow
| flowOut.cpp:84:3:84:14 | *access to array | PostUpdateNode should not be the target of local flow. |
| flowOut.cpp:111:28:111:31 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
| test.cpp:384:10:384:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
| test.cpp:391:10:391:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
| test.cpp:400:10:400:13 | memcpy output argument | PostUpdateNode should not be the target of local flow. |
Expand Down
102 changes: 101 additions & 1 deletion cpp/ql/test/library-tests/dataflow/dataflow-tests/flowOut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void modify_copy_via_strdup(char* p) { // $ ast-def=p

void test_modify_copy_via_strdup(char* p) { // $ ast-def=p
modify_copy_via_strdup(p);
sink(*p); // $ SPURIOUS: ir
sink(*p); // clean
}

int* deref(int** p) { // $ ast-def=p
Expand Down Expand Up @@ -101,3 +101,103 @@ void test2() {
addtaint2(&p);
sink(*p); // $ ir MISSING: ast
}

using size_t = decltype(sizeof(int));

void* memcpy(void* dest, const void* src, size_t);

void modify_copy_via_memcpy(char* p) { // $ ast-def=p
char* dest;
char* p2 = (char*)memcpy(dest, p, 10);
source_ref(p2);
}

void test_modify_copy_via_memcpy(char* p) { // $ ast-def=p
modify_copy_via_memcpy(p);
sink(*p); // clean
}

// These functions from any real database. We add a dataflow model of
// them as part of dataflow library testing.
// `r = strdup_ptr_001`(p) has flow from **p to **r
// `r = strdup_ptr_011`(p) has flow from *p to *r, and **p to **r
// `r = strdup_ptr_111`(p) has flow from p to r, *p to *r, **p to **r
char** strdup_ptr_001(const char** p);
char** strdup_ptr_011(const char** p);
char** strdup_ptr_111(const char** p);

void source_ref_ref(char** toTaint) { // $ ast-def=toTaint ir-def=*toTaint ir-def=**toTaint
// source -> **toTaint
**toTaint = source(true);
}

// This function copies the value of **p into a new location **p2 and then
// taints **p. Thus, **p does not contain tainted data after returning from
// this function.
void modify_copy_via_strdup_ptr_001(char** p) { // $ ast-def=p
// **p -> **p2
char** p2 = strdup_ptr_001(p);
// source -> **p2
source_ref_ref(p2);
}

void test_modify_copy_via_strdup_001(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_001(p);
sink(**p); // clean
}

// This function copies the value of *p into a new location *p2 and then
// taints **p2. Thus, **p contains tainted data after returning from this
// function.
void modify_copy_via_strdup_ptr_011(char** p) { // $ ast-def=p
// **p -> **p2 and *p -> *p2
char** p2 = strdup_ptr_011(p);
// source -> **p2
source_ref_ref(p2);
}

void test_modify_copy_via_strdup_011(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_011(p);
sink(**p); // $ ir MISSING: ast
}

char* source(int);

void source_ref_2(char** toTaint) { // $ ast-def=toTaint ir-def=*toTaint ir-def=**toTaint
// source -> *toTaint
*toTaint = source(42);
}

// This function copies the value of p into a new location p2 and then
// taints *p2. Thus, *p contains tainted data after returning from this
// function.
void modify_copy_via_strdup_ptr_111_taint_ind(char** p) { // $ ast-def=p
// **p -> **p2, *p -> *p2, and p -> p2
char** p2 = strdup_ptr_111(p);
// source -> *p2
source_ref_2(p2);
}

void sink(char*);

void test_modify_copy_via_strdup_111_taint_ind(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_111_taint_ind(p);
sink(*p); // $ ir MISSING: ast
}

// This function copies the value of p into a new location p2 and then
// taints **p2. Thus, **p contains tainted data after returning from this
// function.
void modify_copy_via_strdup_ptr_111_taint_ind_ind(char** p) { // $ ast-def=p
// **p -> **p2, *p -> *p2, and p -> p2
char** p2 = strdup_ptr_111(p);
// source -> **p2
source_ref_ref(p2);
}

void sink(char*);

void test_modify_copy_via_strdup_111_taint_ind_ind(char** p) { // $ ast-def=p
modify_copy_via_strdup_ptr_111_taint_ind_ind(p);
sink(**p); // $ ir MISSING: ast
}
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,11 @@ irFlow
| dispatch.cpp:144:8:144:13 | call to source | dispatch.cpp:96:8:96:8 | x |
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:31:9:31:9 | x |
| flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:61:8:61:11 | access to array |
| flowOut.cpp:8:16:8:23 | call to source | flowOut.cpp:73:8:73:9 | * ... |
| flowOut.cpp:84:18:84:23 | call to source | flowOut.cpp:85:8:85:9 | * ... |
| flowOut.cpp:90:8:90:13 | call to source | flowOut.cpp:102:8:102:9 | * ... |
| flowOut.cpp:131:15:131:20 | call to source | flowOut.cpp:161:8:161:10 | * ... |
| flowOut.cpp:131:15:131:20 | call to source | flowOut.cpp:202:8:202:10 | * ... |
| flowOut.cpp:168:14:168:19 | call to source | flowOut.cpp:185:8:185:9 | * ... |
| globals.cpp:5:17:5:22 | call to source | globals.cpp:6:10:6:14 | local |
| globals.cpp:13:23:13:28 | call to source | globals.cpp:12:10:12:24 | flowTestGlobal1 |
| globals.cpp:23:23:23:28 | call to source | globals.cpp:19:10:19:24 | flowTestGlobal2 |
Expand Down
Loading