Skip to content

Commit 387e575

Browse files
authored
Merge pull request #10650 from asgerf/rb/summarize-more
Ruby: more type-tracking steps
2 parents 7f8bcf7 + decd4c9 commit 387e575

22 files changed

+1454
-668
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ private module Cached {
1414
ReturnStep() or
1515
StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
1616
LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
17+
LoadStoreStep(TypeTrackerContent load, TypeTrackerContent store) {
18+
basicLoadStoreStep(_, _, load, store)
19+
} or
20+
WithContent(ContentFilter filter) { basicWithContentStep(_, _, filter) } or
21+
WithoutContent(ContentFilter filter) { basicWithoutContentStep(_, _, filter) } or
1722
JumpStep()
1823

1924
cached
@@ -25,7 +30,11 @@ private module Cached {
2530
// We can't rely on `basicStoreStep` since `startInContent` might be used with
2631
// a content that has no corresponding store.
2732
exists(TypeTrackerContent loadContents |
28-
basicLoadStep(_, _, loadContents) and
33+
(
34+
basicLoadStep(_, _, loadContents)
35+
or
36+
basicLoadStoreStep(_, _, loadContents, _)
37+
) and
2938
compatibleContents(content, loadContents)
3039
)
3140
}
@@ -37,7 +46,11 @@ private module Cached {
3746
or
3847
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
3948
exists(TypeTrackerContent storeContent |
40-
basicStoreStep(_, _, storeContent) and
49+
(
50+
basicStoreStep(_, _, storeContent)
51+
or
52+
basicLoadStoreStep(_, _, _, storeContent)
53+
) and
4154
compatibleContents(storeContent, content)
4255
)
4356
}
@@ -61,6 +74,14 @@ private module Cached {
6174
or
6275
step = JumpStep() and
6376
result = MkTypeTracker(false, currentContents)
77+
or
78+
exists(ContentFilter filter | result = tt |
79+
step = WithContent(filter) and
80+
currentContents = filter.getAMatchingContent()
81+
or
82+
step = WithoutContent(filter) and
83+
not currentContents = filter.getAMatchingContent()
84+
)
6485
)
6586
or
6687
exists(TypeTrackerContent storeContents, boolean hasCall |
@@ -75,6 +96,16 @@ private module Cached {
7596
tt = noContentTypeTracker(hasCall) and
7697
result = MkTypeTracker(hasCall, storeContents)
7798
)
99+
or
100+
exists(
101+
TypeTrackerContent currentContent, TypeTrackerContent store, TypeTrackerContent load,
102+
boolean hasCall
103+
|
104+
step = LoadStoreStep(pragma[only_bind_into](load), pragma[only_bind_into](store)) and
105+
compatibleContents(pragma[only_bind_into](currentContent), load) and
106+
tt = MkTypeTracker(pragma[only_bind_into](hasCall), currentContent) and
107+
result = MkTypeTracker(pragma[only_bind_out](hasCall), store)
108+
)
78109
}
79110

80111
pragma[nomagic]
@@ -96,6 +127,14 @@ private module Cached {
96127
or
97128
step = JumpStep() and
98129
result = MkTypeBackTracker(false, content)
130+
or
131+
exists(ContentFilter filter | result = tbt |
132+
step = WithContent(filter) and
133+
content = filter.getAMatchingContent()
134+
or
135+
step = WithoutContent(filter) and
136+
not content = filter.getAMatchingContent()
137+
)
99138
)
100139
or
101140
exists(TypeTrackerContent loadContents, boolean hasReturn |
@@ -110,6 +149,16 @@ private module Cached {
110149
tbt = noContentTypeBackTracker(hasReturn) and
111150
result = MkTypeBackTracker(hasReturn, loadContents)
112151
)
152+
or
153+
exists(
154+
TypeTrackerContent currentContent, TypeTrackerContent store, TypeTrackerContent load,
155+
boolean hasCall
156+
|
157+
step = LoadStoreStep(pragma[only_bind_into](load), pragma[only_bind_into](store)) and
158+
compatibleContents(store, pragma[only_bind_into](currentContent)) and
159+
tbt = MkTypeBackTracker(pragma[only_bind_into](hasCall), currentContent) and
160+
result = MkTypeBackTracker(pragma[only_bind_out](hasCall), load)
161+
)
113162
}
114163

115164
/**
@@ -146,6 +195,19 @@ private module Cached {
146195
or
147196
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
148197
)
198+
or
199+
exists(TypeTrackerContent loadContent, TypeTrackerContent storeContent |
200+
flowsToLoadStoreStep(nodeFrom, nodeTo, loadContent, storeContent) and
201+
summary = LoadStoreStep(loadContent, storeContent)
202+
)
203+
or
204+
exists(ContentFilter filter |
205+
basicWithContentStep(nodeFrom, nodeTo, filter) and
206+
summary = WithContent(filter)
207+
or
208+
basicWithoutContentStep(nodeFrom, nodeTo, filter) and
209+
summary = WithoutContent(filter)
210+
)
149211
}
150212

151213
cached
@@ -190,6 +252,18 @@ private predicate flowsToStoreStep(
190252
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
191253
}
192254

255+
/**
256+
* Holds if `loadContent` is loaded from `nodeFrom` and written to `storeContent` of `nodeTo`.
257+
*/
258+
private predicate flowsToLoadStoreStep(
259+
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent loadContent,
260+
TypeTrackerContent storeContent
261+
) {
262+
exists(Node obj |
263+
nodeTo.flowsTo(obj) and basicLoadStoreStep(nodeFrom, obj, loadContent, storeContent)
264+
)
265+
}
266+
193267
/**
194268
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
195269
*
@@ -208,6 +282,11 @@ class StepSummary extends TStepSummary {
208282
or
209283
exists(TypeTrackerContent content | this = LoadStep(content) | result = "load " + content)
210284
or
285+
exists(TypeTrackerContent load, TypeTrackerContent store |
286+
this = LoadStoreStep(load, store) and
287+
result = "load-store " + load + " -> " + store
288+
)
289+
or
211290
this instanceof JumpStep and result = "jump"
212291
}
213292
}

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ class TypeTrackerContent extends OptionalTypeTrackerContent {
2828
/** Gets the content string representing no value. */
2929
OptionalTypeTrackerContent noContent() { result = "" }
3030

31+
/**
32+
* A label to use for `WithContent` and `WithoutContent` steps, restricting
33+
* which `ContentSet` may pass through. Not currently used in Python.
34+
*/
35+
class ContentFilter extends Unit {
36+
TypeTrackerContent getAMatchingContent() { none() }
37+
}
38+
3139
pragma[inline]
3240
predicate compatibleContents(TypeTrackerContent storeContent, TypeTrackerContent loadContent) {
3341
storeContent = loadContent
@@ -110,6 +118,23 @@ predicate basicLoadStep(Node nodeFrom, Node nodeTo, string content) {
110118
)
111119
}
112120

121+
/**
122+
* Holds if the `loadContent` of `nodeFrom` is stored in the `storeContent` of `nodeTo`.
123+
*/
124+
predicate basicLoadStoreStep(Node nodeFrom, Node nodeTo, string loadContent, string storeContent) {
125+
none()
126+
}
127+
128+
/**
129+
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` but block flow of contents matched by `filter` through here.
130+
*/
131+
predicate basicWithoutContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { none() }
132+
133+
/**
134+
* Holds if type-tracking should step from `nodeFrom` to `nodeTo` if inside a content matched by `filter`.
135+
*/
136+
predicate basicWithContentStep(Node nodeFrom, Node nodeTo, ContentFilter filter) { none() }
137+
113138
/**
114139
* A utility class that is equivalent to `boolean` but does not require type joining.
115140
*/

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,11 @@ private predicate localFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo,
620620
summary.toString() = "level"
621621
}
622622

623+
pragma[nomagic]
624+
private predicate hasAdjacentTypeCheckedReads(DataFlow::Node node) {
625+
hasAdjacentTypeCheckedReads(_, _, node.asExpr(), _)
626+
}
627+
623628
/**
624629
* We exclude steps into `self` parameters and type checked variables. For those,
625630
* we instead rely on the type of the enclosing module resp. the type being checked
@@ -629,12 +634,12 @@ private predicate localFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo,
629634
pragma[nomagic]
630635
private DataFlow::Node trackInstanceRec(Module tp, TypeTracker t, boolean exact, StepSummary summary) {
631636
exists(DataFlow::Node mid | mid = trackInstance(tp, exact, t) |
632-
StepSummary::smallstep(mid, result, summary)
637+
StepSummary::smallstep(mid, result, summary) and
638+
not result instanceof SelfParameterNode
633639
or
634-
localFlowStep(mid, result, summary)
635-
) and
636-
not result instanceof SelfParameterNode and
637-
not hasAdjacentTypeCheckedReads(_, _, result.asExpr(), _)
640+
localFlowStep(mid, result, summary) and
641+
not hasAdjacentTypeCheckedReads(result)
642+
)
638643
}
639644

640645
pragma[nomagic]

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,7 @@ private module Cached {
366366

367367
cached
368368
predicate isLocalSourceNode(Node n) {
369-
n instanceof ParameterNode and
370-
not n instanceof SynthHashSplatParameterNode
369+
n instanceof ParameterNode
371370
or
372371
// Expressions that can't be reached from another entry definition or expression
373372
n instanceof ExprNode and
@@ -381,7 +380,7 @@ private module Cached {
381380
n instanceof SynthReturnNode
382381
or
383382
// Needed for stores in type tracking
384-
TypeTrackerSpecific::postUpdateStoreStep(_, n, _)
383+
TypeTrackerSpecific::storeStepIntoSourceNode(_, n, _)
385384
}
386385

387386
cached
@@ -1010,6 +1009,31 @@ private ContentSet getKeywordContent(string name) {
10101009
)
10111010
}
10121011

1012+
/**
1013+
* Subset of `storeStep` that should be shared with type-tracking.
1014+
*/
1015+
predicate storeStepCommon(Node node1, ContentSet c, Node node2) {
1016+
// Wrap all key-value arguments in a synthesized hash-splat argument node
1017+
exists(CfgNodes::ExprNodes::CallCfgNode call | node2 = TSynthHashSplatArgumentNode(call) |
1018+
// symbol key
1019+
exists(ArgumentPosition keywordPos, string name |
1020+
node1.asExpr().(Argument).isArgumentOf(call, keywordPos) and
1021+
keywordPos.isKeyword(name) and
1022+
c = getKeywordContent(name)
1023+
)
1024+
or
1025+
// non-symbol key
1026+
exists(CfgNodes::ExprNodes::PairCfgNode pair, CfgNodes::ExprCfgNode key, ConstantValue cv |
1027+
node1.asExpr() = pair.getValue() and
1028+
pair = call.getAnArgument() and
1029+
key = pair.getKey() and
1030+
cv = key.getConstantValue() and
1031+
not cv.isSymbol(_) and
1032+
c.isSingleton(TKnownElementContent(cv))
1033+
)
1034+
)
1035+
}
1036+
10131037
/**
10141038
* Holds if data can flow from `node1` to `node2` via an assignment to
10151039
* content `c`.
@@ -1040,25 +1064,7 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
10401064
or
10411065
FlowSummaryImpl::Private::Steps::summaryStoreStep(node1, c, node2)
10421066
or
1043-
// Wrap all key-value arguments in a synthesized hash-splat argument node
1044-
exists(CfgNodes::ExprNodes::CallCfgNode call | node2 = TSynthHashSplatArgumentNode(call) |
1045-
// symbol key
1046-
exists(ArgumentPosition keywordPos, string name |
1047-
node1.asExpr().(Argument).isArgumentOf(call, keywordPos) and
1048-
keywordPos.isKeyword(name) and
1049-
c = getKeywordContent(name)
1050-
)
1051-
or
1052-
// non-symbol key
1053-
exists(CfgNodes::ExprNodes::PairCfgNode pair, CfgNodes::ExprCfgNode key, ConstantValue cv |
1054-
node1.asExpr() = pair.getValue() and
1055-
pair = call.getAnArgument() and
1056-
key = pair.getKey() and
1057-
cv = key.getConstantValue() and
1058-
not cv.isSymbol(_) and
1059-
c.isSingleton(TKnownElementContent(cv))
1060-
)
1061-
)
1067+
storeStepCommon(node1, c, node2)
10621068
}
10631069

10641070
/**

ruby/ql/lib/codeql/ruby/frameworks/Core.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ private class SplatSummary extends SummarizedCallable {
7777
private class HashSplatSummary extends SummarizedCallable {
7878
HashSplatSummary() { this = "**(hash-splat)" }
7979

80-
override HashSplatExpr getACall() { any() }
80+
override HashSplatExpr getACallSimple() { any() }
8181

8282
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
8383
input = "Argument[self].WithElement[any]" and

0 commit comments

Comments
 (0)