Skip to content

C++: Missing return-value check for scanf-like functions #1076 #10033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
.vs/*
!.vs/VSWorkspaceSettings.json

# VSCode temporaries
.vscode/*.log

# Byte-compiled python files
*.pyc

Expand Down
85 changes: 85 additions & 0 deletions cpp/ql/src/Critical/MissingCheckScanf.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* @name TODO
* @description TODO
* @kind problem
* @problem.severity warning
* @security-severity TODO
* @precision TODO
* @id cpp/missing-check-scanf
* @tags TODO
*/

import cpp
import semmle.code.cpp.commons.Scanf
import semmle.code.cpp.controlflow.Guards
import semmle.code.cpp.dataflow.DataFlow
import semmle.code.cpp.ir.IR
import semmle.code.cpp.ir.ValueNumbering

// DataFlow::localFlow(DataFlow::parameterNode(src), DataFlow::exprNode(snk))
/*
* Find:
* - all scanf calls
* => ScanfFunctionCall
*
* - relate the value number of each out-arg to:
* 1. the return value of the scanf call, and
* 2. the arg number, post-format, of the out-arg.
* => scanfOutput/3
*
* - Find all accesses to variables with the same
* value number, and if applicable, the guard condition
* of one of their enclosing blocks that ensures
* that the scanf return value is at least the
* associated arg-number
*
* - Combine into data flow tracking from out-arg to use,
* with usage in _other_ scanf calls as barrier.
*/

class ScanfOutput extends Expr {
ScanfFunctionCall call;
int argNum;

ScanfOutput() {
this = call.getArgument(call.getFormatParameterIndex() + argNum) and
argNum >= 1
}

Access getAnAccess() {
exists(Instruction j |
j = getNextInstruction() and
forall(Instruction k | k = getAReset() implies j.getASuccessor+() = k) and
forall(Instruction k | k = getAReuse() implies j.getASuccessor+() = k)
|
result = j.getAst()
)
}

private Instruction getAReset() {
result = getNextInstruction() and
result = any(StoreInstruction s).getDestinationAddress()
}

private Instruction getAReuse() {
result = getNextInstruction() and
exists(Expr e | result.getAst() = e |
e instanceof ScanfOutput
or
e.getParent().(AddressOfExpr) instanceof ScanfOutput
)
}

private Instruction getNextInstruction() {
exists(Instruction i |
i.getUnconvertedResultExpression() = this and
result = valueNumber(i).getAnInstruction() and
i.getASuccessor+() = result
)
}
}

//select any(CallInstruction i | i.getStaticCallTarget() instanceof ScanfFunction).toString()
from ScanfFunctionCall fc
where fc instanceof ExprInVoidContext
select fc, "This is a call to scanf."
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
| test.cpp:23:3:23:7 | call to scanf | This is a call to scanf. |
| test.cpp:39:3:39:7 | call to scanf | This is a call to scanf. |
| test.cpp:56:3:56:7 | call to scanf | This is a call to scanf. |
| test.cpp:70:3:70:8 | call to fscanf | This is a call to scanf. |
| test.cpp:77:3:77:8 | call to sscanf | This is a call to scanf. |
| test.cpp:178:3:178:7 | call to scanf | This is a call to scanf. |
| test.cpp:186:3:186:7 | call to scanf | This is a call to scanf. |
| test.cpp:194:3:194:7 | call to scanf | This is a call to scanf. |
| test.cpp:206:3:206:7 | call to scanf | This is a call to scanf. |
| test.cpp:216:3:216:7 | call to scanf | This is a call to scanf. |
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Critical/MissingCheckScanf.ql
287 changes: 287 additions & 0 deletions cpp/ql/test/query-tests/Critical/MissingCheckScanf/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
typedef struct
{
} FILE;

int scanf(const char *format, ...);
int fscanf(FILE *stream, const char *format, ...);
int sscanf(const char *s, const char *format, ...);

void use(int i);

void set_by_ref(int &i);
void set_by_ptr(int *i);
bool maybe();

FILE *get_a_stream();
const char *get_a_string();

int main()
{
// --- simple cases ---

{
int i;

scanf("%d", &i); // BAD: may not have written `i`
use(i);
}

{
int i;

if (scanf("%d", &i) == 1) // GOOD: checks return value
{
use(i);
}
}

{
int i = 0;

scanf("%d", &i); // BAD. Design choice: already initialized variables shouldn't make a difference.
use(i);
}

{
int i;
use(i);

if (scanf("%d", &i) == 1) // GOOD: only care about uses after scanf call
{
use(i);
}
}

{
int i; // Reused variable

scanf("%d", &i); // BAD
use(i);

if (scanf("%d", &i) == 1) // GOOD
{
use(i);
}
}

// --- different scanf functions ---

{
int i;

fscanf(get_a_stream(), "%d", &i); // BAD: may not have written `i`
use(i);
}

{
int i;

sscanf(get_a_string(), "%d", &i); // BAD: may not have written `i`
use(i);
}

// --- different ways of checking ---

{
int i;

if (scanf("%d", &i) >= 1) // GOOD
{
use(i);
}
}

{
int i;

if (scanf("%d", &i) == 1) // GOOD
{
use(i);
}
}

{
int i;

if (scanf("%d", &i) != 0) // BAD: scanf can return -1 [NOT DETECTED]
{
use(i);
}
}

{
int i;

if (scanf("%d", &i) == 0) // BAD: checks return value incorrectly [NOT DETECTED]
{
use(i);
}
}

{
int r;
int i;

r = scanf("%d", &i); // GOOD

if (r >= 1)
{
use(i);
}
}

{
bool b;
int i;

b = scanf("%d", &i); // BAD [NOT DETECTED]: scanf can return EOF (boolifies true)

if (b >= 1)
{
use(i);
}
}

{
bool b;
int i;

b = scanf("%d", &i); // BAD [NOT DETECTED]

use(i);
}

{
int i, j;

if (scanf("%d %d", &i) >= 2) // GOOD: `j` is not a scanf arg, so out of scope of MissingCheckScanf
{
use(i);
use(j);
}
}

{
int i, j;

if (scanf("%d %d", &i, &j) >= 1) // BAD: checks return value incorrectly [NOT DETECTED]
{
use(i);
use(j);
}
}

// --- different initialization ---

{
int i;
i = 0;

scanf("%d", &i); // BAD
use(i);
}

{
int i;

set_by_ref(i);
scanf("%d", &i); // BAD
use(i);
}

{
int i;

set_by_ptr(&i);
scanf("%d", &i); // BAD
use(i);
}

{
int i;

if (maybe())
{
i = 0;
}

scanf("%d", &i); // BAD: `i` may not have been initialized
use(i);
}

// --- different use ---

{
int i;
int *ptr_i = &i;

scanf("%d", &i); // BAD: may not have written `i`
use(*ptr_i);
}

{
int i;
int *ptr_i = &i;

scanf("%d", ptr_i); // BAD: may not have written `*ptr_i`
use(i);
}

{
int i;
scanf("%d", &i);
i = 42;
use(i); // GOOD [FALSE POSITIVE]
}

// --- weird formatting strings ---

{
int i;

if (scanf("%n %d", &i) >= 1) // GOOD (`%n` does not consume input)
{
use(i);
}
}

{
int i;

if (scanf("%% %d", &i) >= 1) // GOOD (`%%` does not consume input)
{
use(i);
}
}

{
int i;

if (scanf("%*d %d", &i) >= 1) // GOOD (`%*d` does not consume input)
{
use(i);
}
}
}

// Non-local cases:

bool my_scan_int(int &i)
{
return scanf("%d", &i) == 1; // GOOD
}

void my_scan_int_test()
{
int i;

use(i); // GOOD: used before scanf

my_scan_int(i); // BAD [NOT DETECTED]
use(i);

if (my_scan_int(i)) // GOOD
{
use(i);
}
}