Skip to content

Ruby: more type-tracking steps #10650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
Oct 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ab672de
Ruby: strip trailing whitespace in calls.rb test
asgerf Sep 30, 2022
1c484d8
Ruby: add some calls to .each in call graph test
asgerf Sep 30, 2022
a4d4e40
Ruby: Summarize level steps in type tracking
asgerf Sep 29, 2022
0000a7d
Ruby: Summarize load-store steps in type-tracking
asgerf Sep 30, 2022
fbab0f5
Ruby: Evaluate longer summary component stacks
asgerf Sep 30, 2022
5b2d8b0
Ruby: make Array.each a simple summary
asgerf Sep 30, 2022
f75f27d
Ruby: update test
asgerf Sep 30, 2022
c06743a
Ruby: update benign test updates
asgerf Sep 30, 2022
74c3886
Ruby: use getACallSimple in more Array methods
asgerf Sep 30, 2022
8b389fe
Ruby: use getACallSimple in more Hash methods
asgerf Sep 30, 2022
8c43ab6
Ruby: go to local source in load-store steps
asgerf Sep 30, 2022
a7d764d
Ruby: Improve join order when generating edges
asgerf Sep 30, 2022
323abf4
Ruby: Speed up evaluateSummaryComponentStackLocal
asgerf Sep 30, 2022
bd11946
Ruby: support WithoutContent steps in restricted cases
asgerf Sep 30, 2022
9302271
Ruby: Hack special-casing of hash literals
asgerf Oct 1, 2022
00e52ad
Ruby: add type-tracking variant of hash-flow test
asgerf Oct 1, 2022
fd9c1e4
Ruby: filter out obvious module 'prepend' calls
asgerf Oct 2, 2022
ff4ce4a
Ruby: use Element[n..] tokens in inject and reduce
asgerf Oct 2, 2022
c220f4e
Ruby: prune unusable summaries earlier
asgerf Oct 2, 2022
6e7aea8
Ruby: update benign test output
asgerf Oct 3, 2022
96711b2
Ruby: improve join order in trackInstanceRec
asgerf Oct 1, 2022
94d41b9
Ruby: add hook for adding type-tracking steps
asgerf Oct 3, 2022
3ccc3a2
Ruby: move special treatment of Hash.[] into Hash.qll
asgerf Oct 3, 2022
b6231e8
Ruby: do not treat WithoutElement[0..!] as a type filter
asgerf Oct 3, 2022
28f4dff
Python: sync
asgerf Oct 3, 2022
9485940
Ruby: share type-tracking test with array test
asgerf Oct 3, 2022
4c19d2d
Ruby: make getAStaticHashCall private again
asgerf Oct 5, 2022
a9a99c5
Ruby: nomagic on unary hasAdjacentTypeCheckedReads
asgerf Oct 5, 2022
f5f351e
Ruby: make flowsToLoadStoreStep private
asgerf Oct 5, 2022
93e8434
Ruby: fix content restriction in type trackers
asgerf Oct 5, 2022
8b7ec20
Merge branch 'main' into rb/summarize-more
asgerf Oct 5, 2022
6f74a52
Merge branch 'main' into rb/summarize-more
asgerf Oct 5, 2022
7cf969f
Ruby: remove mention of PairValueContent
asgerf Oct 5, 2022
f664a77
Ruby: ensure Hash flow works again
asgerf Oct 5, 2022
ab6e488
Python: sync
asgerf Oct 5, 2022
c9c3698
Ruby: address review comments
asgerf Oct 5, 2022
decd4c9
Ruby: update type tracking test
asgerf Oct 5, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ private module Cached {
ReturnStep() or
StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
basicLoadStoreStep(_, _, load, store)
} or
WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or
WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or
JumpStep()

cached
Expand All @@ -25,7 +30,11 @@ private module Cached {
// We can't rely on `basicStoreStep` since `startInContent` might be used with
// a content that has no corresponding store.
exists(TypeTrackerContent loadContents |
basicLoadStep(_, _, loadContents) and
(
basicLoadStep(_, _, loadContents)
or
basicLoadStoreStep(_, _, loadContents, _)
) and
compatibleContents(content, loadContents)
)
}
Expand All @@ -37,7 +46,11 @@ private module Cached {
or
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
exists(TypeTrackerContent storeContent |
basicStoreStep(_, _, storeContent) and
(
basicStoreStep(_, _, storeContent)
or
basicLoadStoreStep(_, _, _, storeContent)
) and
compatibleContents(storeContent, content)
)
}
Expand All @@ -61,6 +74,14 @@ private module Cached {
or
step = JumpStep() and
result = MkTypeTracker(false, currentContents)
or
exists(ContentFilter filter | result = tt |
step = WithContent(filter) and
currentContents = filter.getAMatchingContent()
or
step = WithoutContent(filter) and
not currentContents = filter.getAMatchingContent()
)
)
or
exists(TypeTrackerContent storeContents, boolean hasCall |
Expand All @@ -75,6 +96,16 @@ private module Cached {
tt = noContentTypeTracker(hasCall) and
result = MkTypeTracker(hasCall, storeContents)
)
or
exists(
TypeTrackerContent currentContent, TypeTrackerContent store, TypeTrackerContent load,
boolean hasCall
|
step = LoadStoreStep(pragma[only_bind_into](load), pragma[only_bind_into](store)) and
compatibleContents(pragma[only_bind_into](currentContent), load) and
tt = MkTypeTracker(pragma[only_bind_into](hasCall), currentContent) and
result = MkTypeTracker(pragma[only_bind_out](hasCall), store)
)
}

pragma[nomagic]
Expand All @@ -96,6 +127,14 @@ private module Cached {
or
step = JumpStep() and
result = MkTypeBackTracker(false, content)
or
exists(ContentFilter filter | result = tbt |
step = WithContent(filter) and
content = filter.getAMatchingContent()
or
step = WithoutContent(filter) and
not content = filter.getAMatchingContent()
)
)
or
exists(TypeTrackerContent loadContents, boolean hasReturn |
Expand All @@ -110,6 +149,16 @@ private module Cached {
tbt = noContentTypeBackTracker(hasReturn) and
result = MkTypeBackTracker(hasReturn, loadContents)
)
or
exists(
TypeTrackerContent currentContent, TypeTrackerContent store, TypeTrackerContent load,
boolean hasCall
|
step = LoadStoreStep(pragma[only_bind_into](load), pragma[only_bind_into](store)) and
compatibleContents(store, pragma[only_bind_into](currentContent)) and
tbt = MkTypeBackTracker(pragma[only_bind_into](hasCall), currentContent) and
result = MkTypeBackTracker(pragma[only_bind_out](hasCall), load)
)
}

/**
Expand Down Expand Up @@ -146,6 +195,19 @@ private module Cached {
or
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
)
or
exists(TypeTrackerContent loadContent, TypeTrackerContent storeContent |
flowsToLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent) and
summary = LoadStoreStep(loadContent, storeContent)
)
or
exists(ContentFilter filter |
basicWithContentStep(nodeFrom, nodeTo, filter) and
summary = WithContent(filter)
or
basicWithoutContentStep(nodeFrom, nodeTo, filter) and
summary = WithoutContent(filter)
)
}

cached
Expand Down Expand Up @@ -190,6 +252,18 @@ private predicate flowsToStoreStep(
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
}

/**
* Holds if `loadContent` is loaded from `nodeFrom` and written to `storeContent` of `nodeTo`.
*/
private predicate flowsToLoadStoreStep(
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent loadContent,
TypeTrackerContent storeContent
) {
exists(Node obj |
nodeTo.flowsTo(obj) and basicLoadStoreStep(nodeFrom, obj, loadContent, storeContent)
)
}

/**
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
*
Expand All @@ -208,6 +282,11 @@ class StepSummary extends TStepSummary {
or
exists(TypeTrackerContent content | this = LoadStep(content) | result = "load " + content)
or
exists(TypeTrackerContent load, TypeTrackerContent store |
this = LoadStoreStep(load, store) and
result = "load-store " + load + " -> " + store
)
or
this instanceof JumpStep and result = "jump"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ class TypeTrackerContent extends OptionalTypeTrackerContent {
/** Gets the content string representing no value. */
OptionalTypeTrackerContent noContent() { result = "" }

/**
* A label to use for `WithContent` and `WithoutContent` steps, restricting
* which `ContentSet` may pass through. Not currently used in Python.
*/
class ContentFilter extends Unit {
TypeTrackerContent getAMatchingContent() { none() }
}

pragma[inline]
predicate compatibleContents(TypeTrackerContent storeContent, TypeTrackerContent loadContent) {
storeContent = loadContent
Expand Down Expand Up @@ -110,6 +118,23 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
)
}

/**
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
*/
predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) {
none()
}

/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
*/
predicate basicWithoutContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { none() }

/**
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
*/
predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { none() }

/**
* A utility class that is equivalent to `boolean` but does not require type joining.
*/
Expand Down
15 changes: 10 additions & 5 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,11 @@ private predicate localFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo,
summary.toString() = "level"
}

pragma[nomagic]
private predicate hasAdjacentTypeCheckedReads(DataFlow::Node node) {
hasAdjacentTypeCheckedReads(_, _, node.asExpr(), _)
}

/**
* We exclude steps into `self` parameters and type checked variables. For those,
* we instead rely on the type of the enclosing module resp. the type being checked
Expand All @@ -629,12 +634,12 @@ private predicate localFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo,
pragma[nomagic]
private DataFlow::Node trackInstanceRec(Module tp, TypeTracker t, boolean exact, StepSummary summary) {
exists(DataFlow::Node mid | mid = trackInstance(tp, exact, t) |
StepSummary::smallstep(mid, result, summary)
StepSummary::smallstep(mid, result, summary) and
not result instanceof SelfParameterNode
or
localFlowStep(mid, result, summary)
) and
not result instanceof SelfParameterNode and
not hasAdjacentTypeCheckedReads(_, _, result.asExpr(), _)
localFlowStep(mid, result, summary) and
not hasAdjacentTypeCheckedReads(result)
)
}

pragma[nomagic]
Expand Down
50 changes: 28 additions & 22 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,7 @@ private module Cached {

cached
predicate isLocalSourceNode(Node n) {
n instanceof ParameterNode and
not n instanceof SynthHashSplatParameterNode
n instanceof ParameterNode
or
// Expressions that can't be reached from another entry definition or expression
n instanceof ExprNode and
Expand All @@ -381,7 +380,7 @@ private module Cached {
n instanceof SynthReturnNode
or
// Needed for stores in type tracking
TypeTrackerSpecific::postUpdateStoreStep(_, n, _)
TypeTrackerSpecific::storeStepIntoSourceNode(_, n, _)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I belive we should remove

not n instanceof SynthHashSplatParameterNode

above as well.

}

cached
Expand Down Expand Up @@ -1010,6 +1009,31 @@ private ContentSet getKeywordContent(string name) {
)
}

/**
* Subset of `storeStep` that should be shared with type-tracking.
*/
predicate storeStepCommon(Node node1, ContentSet c, Node node2) {
// Wrap all key-value arguments in a synthesized hash-splat argument node
exists(CfgNodes::ExprNodes::CallCfgNode call | node2 = TSynthHashSplatArgumentNode(call) |
// symbol key
exists(ArgumentPosition keywordPos, string name |
node1.asExpr().(Argument).isArgumentOf(call, keywordPos) and
keywordPos.isKeyword(name) and
c = getKeywordContent(name)
)
or
// non-symbol key
exists(CfgNodes::ExprNodes::PairCfgNode pair, CfgNodes::ExprCfgNode key, ConstantValue cv |
node1.asExpr() = pair.getValue() and
pair = call.getAnArgument() and
key = pair.getKey() and
cv = key.getConstantValue() and
not cv.isSymbol(_) and
c.isSingleton(TKnownElementContent(cv))
)
)
}

/**
* Holds if data can flow from `node1` to `node2` via an assignment to
* content `c`.
Expand Down Expand Up @@ -1040,25 +1064,7 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
or
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2)
or
// Wrap all key-value arguments in a synthesized hash-splat argument node
exists(CfgNodes::ExprNodes::CallCfgNode call | node2 = TSynthHashSplatArgumentNode(call) |
// symbol key
exists(ArgumentPosition keywordPos, string name |
node1.asExpr().(Argument).isArgumentOf(call, keywordPos) and
keywordPos.isKeyword(name) and
c = getKeywordContent(name)
)
or
// non-symbol key
exists(CfgNodes::ExprNodes::PairCfgNode pair, CfgNodes::ExprCfgNode key, ConstantValue cv |
node1.asExpr() = pair.getValue() and
pair = call.getAnArgument() and
key = pair.getKey() and
cv = key.getConstantValue() and
not cv.isSymbol(_) and
c.isSingleton(TKnownElementContent(cv))
)
)
storeStepCommon(node1, c, node2)
}

/**
Expand Down
2 changes: 1 addition & 1 deletion ruby/ql/lib/codeql/ruby/frameworks/Core.qll
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private class SplatSummary extends SummarizedCallable {
private class HashSplatSummary extends SummarizedCallable {
HashSplatSummary() { this = "**(hash-splat)" }

override HashSplatExpr getACall() { any() }
override HashSplatExpr getACallSimple() { any() }

override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
input = "Argument[self].WithElement[any]" and
Expand Down
Loading