Skip to content

Python: Support integer subscripts in the API graph #15497

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions python/ql/lib/semmle/python/ApiGraphs.qll
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,11 @@ module API {
* Gets a node representing a subscript of this node.
* For example `obj[x]` is a subscript of `obj`.
*/
Node getASubscript() { result = this.getASuccessor(Label::subscript()) }
Node getASubscript() {
result = this.getASuccessor(Label::subscript())
or
result = this.getASuccessor(Label::intSubscript(_))
}

/**
* Gets a node representing an index of a subscript of this node.
Expand All @@ -261,6 +265,25 @@ module API {
)
}

/**
* Gets a node representing a subscript of this node at (int) index `i`.
* This requires that the index can be statically determined.
*
* For example, the string `value` can be found as subscripts of
* both `a` and `b` below using the index `1`:
* ```py
* a[1] = 'value'
* b = ['list', 'value']
* ```
*/
Node getIntSubscript(int i) {
exists(API::Node index | result = this.getSubscriptAt(index) |
i = index.getAValueReachingSink().asExpr().(PY::IntegerLiteral).getValue()
)
or
result = this.getASuccessor(Label::intSubscript(i))
}

/**
* Gets a node representing a subscript of this node at index `index`.
*/
Expand Down Expand Up @@ -758,9 +781,9 @@ module API {
// TODO: once convenient, this should be done at a higher level than the AST,
// at least at the CFG layer, to take splitting into account.
// Also consider `SequenceNode for generality.
exists(PY::List list | list = pred.(DataFlow::ExprNode).getNode().getNode() |
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getAnElt() and
lbl = Label::subscript()
exists(PY::List list, int index | list = pred.(DataFlow::ExprNode).getNode().getNode() |
rhs.(DataFlow::ExprNode).getNode().getNode() = list.getElt(index) and
lbl = Label::intSubscript(index)
)
or
exists(PY::CallableExpr fn | fn = pred.(DataFlow::ExprNode).getNode().getNode() |
Expand Down Expand Up @@ -1068,6 +1091,7 @@ module API {
MkLabelAwait() or
MkLabelSubscript() or
MkLabelIndex() or
MkLabelIntSubscript(int index) { exists(PY::List l | exists(l.getElt(index))) } or
MkLabelEntryPoint(EntryPoint ep)

/** A label for a module. */
Expand Down Expand Up @@ -1148,6 +1172,17 @@ module API {
override string toString() { result = "getASubscript()" }
}

/** A label that gets the integer subscript of a sequence/mapping. */
class LabelIntSubscript extends ApiLabel, MkLabelIntSubscript {
int index;

LabelIntSubscript() { this = MkLabelIntSubscript(index) }

override string toString() { result = "getIntSubscript(" + index.toString() + ")" }

int getIndex() { result = index }
}

/** A label that gets the index of a subscript. */
class LabelIndex extends ApiLabel, MkLabelIndex {
override string toString() { result = "getIndex()" }
Expand Down Expand Up @@ -1201,6 +1236,9 @@ module API {
/** Gets the `subscript` edge label. */
LabelSubscript subscript() { any() }

/** Gets the `intSubscript` edge label. */
LabelIntSubscript intSubscript(int index) { result.getIndex() = index }

/** Gets the `subscript` edge label. */
LabelIndex index() { any() }

Expand Down
19 changes: 19 additions & 0 deletions python/ql/src/meta/StdLib/AllStdLibCalls.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import python
private import semmle.python.dataflow.new.internal.DataFlowDispatch

predicate resolvedCall(CallNode call, Function callable) {
exists(DataFlowCallable dfCallable, DataFlowCall dfCall |
dfCallable.getScope() = callable and
dfCall.getNode() = call and
dfCallable = viableCallable(dfCall)
)
}

from Function f, CallNode call, string name
where
resolvedCall(call, f) and
not call.getLocation().getFile().inStdlib() and
f.getLocation().getFile().inStdlib() and
f.getName() = name and
name != "__init__"
select name, f.getScope()
136 changes: 136 additions & 0 deletions python/ql/src/meta/StdLib/FindUses.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
import python
import semmle.python.dataflow.new.DataFlow
import semmle.python.dataflow.new.TaintTracking

pragma[inline]
predicate inStdLib(DataFlow::Node node) { node.getLocation().getFile().inStdlib() }

pragma[inline]
string stepsTo(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
if DataFlow::localFlow(nodeFrom, nodeTo)
then result = "local"
else
if
TaintTracking::localTaint(nodeFrom, nodeTo)
or
exists(TaintTracking::AdditionalTaintStep s | s.step(nodeFrom, nodeTo))
or
exists(
TaintTracking::AdditionalTaintStep s, DataFlow::Node entryNode, DataFlow::Node exitNode
|
s.step(entryNode, exitNode)
|
TaintTracking::localTaint(nodeFrom, entryNode) and
TaintTracking::localTaint(exitNode, nodeTo)
)
then result = "taint"
else result = "no"
}

abstract class EntryPointsByQuery extends string {
bindingset[this]
EntryPointsByQuery() { any() }

abstract predicate subpath(
DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode
);

predicate entryPoint(
DataFlow::Node argument, string parameterName, string functionName, DataFlow::Node outNode,
string alreadyModelled

Check warning

Code scanning / CodeQL

Misspelling

This variable name contains the non-US spelling 'modelled', which should instead be 'modeled'.
) {
exists(DataFlow::ParameterNode parameter, Function function |
parameterName = parameter.getParameter().getName() and
functionName = function.getLocation().getFile().getShortName() + ":" + function.getName()
|
this.subpath(argument, parameter, outNode) and
not inStdLib(argument) and
inStdLib(parameter) and
function = parameter.getScope() and
alreadyModelled = stepsTo(argument, outNode)
)
}
}

module EntryPointsForRegexInjectionQuery {
private import semmle.python.security.dataflow.RegexInjectionQuery

module Flow = RegexInjectionFlow;

private import Flow::PathGraph

private class EntryPointsForRegexInjectionQuery extends EntryPointsByQuery {
EntryPointsForRegexInjectionQuery() { this = "RegexInjectionQuery" }

override predicate subpath(
DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode
) {
exists(Flow::PathNode arg, Flow::PathNode par, Flow::PathNode out |
subpaths(arg, par, _, out)
|
argument = arg.getNode() and
parameter = par.getNode() and
outNode = out.getNode()
)
}
}
}

module EntryPointsForUnsafeShellCommandConstructionQuery {
private import semmle.python.security.dataflow.UnsafeShellCommandConstructionQuery

module Flow = UnsafeShellCommandConstructionFlow;

private import Flow::PathGraph

private class EntryPointsForUnsafeShellCommandConstructionQuery extends EntryPointsByQuery {
EntryPointsForUnsafeShellCommandConstructionQuery() {
this = "UnsafeShellCommandConstructionQuery"
}

override predicate subpath(
DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode
) {
exists(Flow::PathNode arg, Flow::PathNode par, Flow::PathNode out |
subpaths(arg, par, _, out)
|
argument = arg.getNode() and
parameter = par.getNode() and
outNode = out.getNode()
)
}
}
}

module EntryPointsForPolynomialReDoSQuery {
private import semmle.python.security.dataflow.PolynomialReDoSQuery

module Flow = PolynomialReDoSFlow;

private import Flow::PathGraph

private class EntryPointsForPolynomialReDoSQuery extends EntryPointsByQuery {
EntryPointsForPolynomialReDoSQuery() { this = "PolynomialReDoSQuery" }

override predicate subpath(
DataFlow::Node argument, DataFlow::ParameterNode parameter, DataFlow::Node outNode
) {
exists(Flow::PathNode arg, Flow::PathNode par, Flow::PathNode out |
subpaths(arg, par, _, out)
|
argument = arg.getNode() and
parameter = par.getNode() and
outNode = out.getNode()
)
}
}
}

from
EntryPointsByQuery e, DataFlow::Node argument, string parameter, string functionName,
DataFlow::Node outNode, string alreadyModelled

Check warning

Code scanning / CodeQL

Misspelling

This variable name contains the non-US spelling 'modelled', which should instead be 'modeled'.
where
e.entryPoint(argument, parameter, functionName, outNode, alreadyModelled) and
alreadyModelled = "no"
// select e, argument, parameter, functionName, outNode, alreadyModelled
select e, parameter, functionName, alreadyModelled
14 changes: 14 additions & 0 deletions python/ql/test/library-tests/ApiGraphs/py3/test_subscript.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,17 @@ def test_subscript():
mypkg.foo()["qux"] += 42 #$ use=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
mypkg.foo()["qux"] += 42 #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()
mypkg.foo()[mypkg.index] = mypkg.value #$ def=moduleImport("mypkg").getMember("foo").getReturn().getASubscript()

import gradio as gr

def greet(name, surname):
return "Hello " + name + surname + "!"

with gr.Blocks() as demo:
name = gr.Textbox(label="Name")
surname = gr.Textbox(label="Surname")
output = gr.Textbox(label="Output Box")
greet_btn = gr.Button("Greet")
greet_btn.click(fn=greet, inputs=[name, surname], outputs=output, api_name="greet") #$ def=moduleImport("gradio").getMember("Button").getReturn().getMember("click").getKeywordParameter("inputs").getIntSubscript(1)

demo.launch()