Skip to content

Commit 6e1914a

Browse files
authored
Merge pull request #10375 from asgerf/rb/summarize-loads-v2
Ruby: type-tracking and API edges through simple library callables
2 parents ef8ec08 + ed36f19 commit 6e1914a

23 files changed

+900
-240
lines changed

python/ql/lib/semmle/python/dataflow/new/TypeTracker.qll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55

66
private import python
77
private import internal.TypeTracker as Internal
8+
private import internal.TypeTrackerSpecific as InternalSpecific
89

910
/** A string that may appear as the name of an attribute or access path. */
10-
class AttributeName = Internal::ContentName;
11+
class AttributeName = InternalSpecific::TypeTrackerContent;
1112

1213
/** An attribute name, or the empty string (representing no attribute). */
13-
class OptionalAttributeName = Internal::OptionalContentName;
14+
class OptionalAttributeName = InternalSpecific::OptionalTypeTrackerContent;
1415

1516
/**
1617
* The summary of the steps needed to track a value to a given dataflow node.

python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll

Lines changed: 96 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,6 @@
22

33
private import TypeTrackerSpecific
44

5-
/**
6-
* A string that may appear as the name of a piece of content. This will usually include things like:
7-
* - Attribute names (in Python)
8-
* - Property names (in JavaScript)
9-
*
10-
* In general, this can also be used to model things like stores to specific list indices. To ensure
11-
* correctness, it is important that
12-
*
13-
* - different types of content do not have overlapping names, and
14-
* - the empty string `""` is not a valid piece of content, as it is used to indicate the absence of
15-
* content instead.
16-
*/
17-
class ContentName extends string {
18-
ContentName() { this = getPossibleContentName() }
19-
}
20-
21-
/** A content name, or the empty string (representing no content). */
22-
class OptionalContentName extends string {
23-
OptionalContentName() { this instanceof ContentName or this = "" }
24-
}
25-
265
cached
276
private module Cached {
287
/**
@@ -33,48 +12,78 @@ private module Cached {
3312
LevelStep() or
3413
CallStep() or
3514
ReturnStep() or
36-
StoreStep(ContentName content) or
37-
LoadStep(ContentName content) or
15+
StoreStep(TypeTrackerContent content) { basicStoreStep(_, _, content) } or
16+
LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
3817
JumpStep()
3918

19+
pragma[nomagic]
20+
private TypeTracker noContentTypeTracker(boolean hasCall) {
21+
result = MkTypeTracker(hasCall, noContent())
22+
}
23+
4024
/** Gets the summary resulting from appending `step` to type-tracking summary `tt`. */
4125
cached
4226
TypeTracker append(TypeTracker tt, StepSummary step) {
43-
exists(Boolean hasCall, OptionalContentName content | tt = MkTypeTracker(hasCall, content) |
27+
exists(Boolean hasCall, OptionalTypeTrackerContent currentContents |
28+
tt = MkTypeTracker(hasCall, currentContents)
29+
|
4430
step = LevelStep() and result = tt
4531
or
46-
step = CallStep() and result = MkTypeTracker(true, content)
32+
step = CallStep() and result = MkTypeTracker(true, currentContents)
4733
or
4834
step = ReturnStep() and hasCall = false and result = tt
4935
or
50-
step = LoadStep(content) and result = MkTypeTracker(hasCall, "")
51-
or
52-
exists(string p | step = StoreStep(p) and content = "" and result = MkTypeTracker(hasCall, p))
53-
or
5436
step = JumpStep() and
55-
result = MkTypeTracker(false, content)
37+
result = MkTypeTracker(false, currentContents)
38+
)
39+
or
40+
exists(TypeTrackerContent storeContents, boolean hasCall |
41+
exists(TypeTrackerContent loadContents |
42+
step = LoadStep(pragma[only_bind_into](loadContents)) and
43+
tt = MkTypeTracker(hasCall, storeContents) and
44+
compatibleContents(storeContents, loadContents) and
45+
result = noContentTypeTracker(hasCall)
46+
)
47+
or
48+
step = StoreStep(pragma[only_bind_into](storeContents)) and
49+
tt = noContentTypeTracker(hasCall) and
50+
result = MkTypeTracker(hasCall, storeContents)
5651
)
5752
}
5853

54+
pragma[nomagic]
55+
private TypeBackTracker noContentTypeBackTracker(boolean hasReturn) {
56+
result = MkTypeBackTracker(hasReturn, noContent())
57+
}
58+
5959
/** Gets the summary resulting from prepending `step` to this type-tracking summary. */
6060
cached
6161
TypeBackTracker prepend(TypeBackTracker tbt, StepSummary step) {
62-
exists(Boolean hasReturn, string content | tbt = MkTypeBackTracker(hasReturn, content) |
62+
exists(Boolean hasReturn, OptionalTypeTrackerContent content |
63+
tbt = MkTypeBackTracker(hasReturn, content)
64+
|
6365
step = LevelStep() and result = tbt
6466
or
6567
step = CallStep() and hasReturn = false and result = tbt
6668
or
6769
step = ReturnStep() and result = MkTypeBackTracker(true, content)
6870
or
69-
exists(string p |
70-
step = LoadStep(p) and content = "" and result = MkTypeBackTracker(hasReturn, p)
71-
)
72-
or
73-
step = StoreStep(content) and result = MkTypeBackTracker(hasReturn, "")
74-
or
7571
step = JumpStep() and
7672
result = MkTypeBackTracker(false, content)
7773
)
74+
or
75+
exists(TypeTrackerContent loadContents, boolean hasReturn |
76+
exists(TypeTrackerContent storeContents |
77+
step = StoreStep(pragma[only_bind_into](storeContents)) and
78+
tbt = MkTypeBackTracker(hasReturn, loadContents) and
79+
compatibleContents(storeContents, loadContents) and
80+
result = noContentTypeBackTracker(hasReturn)
81+
)
82+
or
83+
step = LoadStep(pragma[only_bind_into](loadContents)) and
84+
tbt = noContentTypeBackTracker(hasReturn) and
85+
result = MkTypeBackTracker(hasReturn, loadContents)
86+
)
7887
}
7988

8089
/**
@@ -114,9 +123,9 @@ class StepSummary extends TStepSummary {
114123
or
115124
this instanceof ReturnStep and result = "return"
116125
or
117-
exists(string content | this = StoreStep(content) | result = "store " + content)
126+
exists(TypeTrackerContent content | this = StoreStep(content) | result = "store " + content)
118127
or
119-
exists(string content | this = LoadStep(content) | result = "load " + content)
128+
exists(TypeTrackerContent content | this = LoadStep(content) | result = "load " + content)
120129
or
121130
this instanceof JumpStep and result = "jump"
122131
}
@@ -130,7 +139,7 @@ private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSu
130139
levelStep(nodeFrom, nodeTo) and
131140
summary = LevelStep()
132141
or
133-
exists(string content |
142+
exists(TypeTrackerContent content |
134143
StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
135144
summary = StoreStep(content)
136145
or
@@ -180,7 +189,7 @@ module StepSummary {
180189
}
181190

182191
/**
183-
* Holds if `nodeFrom` is being written to the `content` content of the object in `nodeTo`.
192+
* Holds if `nodeFrom` is being written to the `content` of the object in `nodeTo`.
184193
*
185194
* Note that `nodeTo` will always be a local source node that flows to the place where the content
186195
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
@@ -204,12 +213,23 @@ module StepSummary {
204213
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
205214
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
206215
*/
207-
predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, string content) {
216+
predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content) {
208217
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
209218
}
210219
}
211220

212-
private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentName content)
221+
private newtype TTypeTracker =
222+
MkTypeTracker(Boolean hasCall, OptionalTypeTrackerContent content) {
223+
content = noContent()
224+
or
225+
// Restrict `content` to those that might eventually match a load.
226+
// We can't rely on `basicStoreStep` since `startInContent` might be used with
227+
// a content that has no corresponding store.
228+
exists(TypeTrackerContent loadContents |
229+
basicLoadStep(_, _, loadContents) and
230+
compatibleContents(content, loadContents)
231+
)
232+
}
213233

214234
/**
215235
* A summary of the steps needed to track a value to a given dataflow node.
@@ -240,7 +260,7 @@ private newtype TTypeTracker = MkTypeTracker(Boolean hasCall, OptionalContentNam
240260
*/
241261
class TypeTracker extends TTypeTracker {
242262
Boolean hasCall;
243-
OptionalContentName content;
263+
OptionalTypeTrackerContent content;
244264

245265
TypeTracker() { this = MkTypeTracker(hasCall, content) }
246266

@@ -251,32 +271,38 @@ class TypeTracker extends TTypeTracker {
251271
string toString() {
252272
exists(string withCall, string withContent |
253273
(if hasCall = true then withCall = "with" else withCall = "without") and
254-
(if content != "" then withContent = " with content " + content else withContent = "") and
274+
(
275+
if content != noContent()
276+
then withContent = " with content " + content
277+
else withContent = ""
278+
) and
255279
result = "type tracker " + withCall + " call steps" + withContent
256280
)
257281
}
258282

259283
/**
260284
* Holds if this is the starting point of type tracking.
261285
*/
262-
predicate start() { hasCall = false and content = "" }
286+
predicate start() { hasCall = false and content = noContent() }
263287

264288
/**
265289
* Holds if this is the starting point of type tracking, and the value starts in the content named `contentName`.
266290
* The type tracking only ends after the content has been loaded.
267291
*/
268-
predicate startInContent(ContentName contentName) { hasCall = false and content = contentName }
292+
predicate startInContent(TypeTrackerContent contentName) {
293+
hasCall = false and content = contentName
294+
}
269295

270296
/**
271297
* Holds if this is the starting point of type tracking
272298
* when tracking a parameter into a call, but not out of it.
273299
*/
274-
predicate call() { hasCall = true and content = "" }
300+
predicate call() { hasCall = true and content = noContent() }
275301

276302
/**
277303
* Holds if this is the end point of type tracking.
278304
*/
279-
predicate end() { content = "" }
305+
predicate end() { content = noContent() }
280306

281307
/**
282308
* INTERNAL. DO NOT USE.
@@ -290,15 +316,15 @@ class TypeTracker extends TTypeTracker {
290316
*
291317
* Gets the content associated with this type tracker.
292318
*/
293-
string getContent() { result = content }
319+
OptionalTypeTrackerContent getContent() { result = content }
294320

295321
/**
296322
* Gets a type tracker that starts where this one has left off to allow continued
297323
* tracking.
298324
*
299325
* This predicate is only defined if the type is not associated to a piece of content.
300326
*/
301-
TypeTracker continue() { content = "" and result = this }
327+
TypeTracker continue() { content = noContent() and result = this }
302328

303329
/**
304330
* Gets the summary that corresponds to having taken a forwards
@@ -356,7 +382,16 @@ module TypeTracker {
356382
TypeTracker end() { result.end() }
357383
}
358384

359-
private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, OptionalContentName content)
385+
private newtype TTypeBackTracker =
386+
MkTypeBackTracker(Boolean hasReturn, OptionalTypeTrackerContent content) {
387+
content = noContent()
388+
or
389+
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
390+
exists(TypeTrackerContent storeContent |
391+
basicStoreStep(_, _, storeContent) and
392+
compatibleContents(storeContent, content)
393+
)
394+
}
360395

361396
/**
362397
* A summary of the steps needed to back-track a use of a value to a given dataflow node.
@@ -390,7 +425,7 @@ private newtype TTypeBackTracker = MkTypeBackTracker(Boolean hasReturn, Optional
390425
*/
391426
class TypeBackTracker extends TTypeBackTracker {
392427
Boolean hasReturn;
393-
string content;
428+
OptionalTypeTrackerContent content;
394429

395430
TypeBackTracker() { this = MkTypeBackTracker(hasReturn, content) }
396431

@@ -401,20 +436,24 @@ class TypeBackTracker extends TTypeBackTracker {
401436
string toString() {
402437
exists(string withReturn, string withContent |
403438
(if hasReturn = true then withReturn = "with" else withReturn = "without") and
404-
(if content != "" then withContent = " with content " + content else withContent = "") and
439+
(
440+
if content != noContent()
441+
then withContent = " with content " + content
442+
else withContent = ""
443+
) and
405444
result = "type back-tracker " + withReturn + " return steps" + withContent
406445
)
407446
}
408447

409448
/**
410449
* Holds if this is the starting point of type tracking.
411450
*/
412-
predicate start() { hasReturn = false and content = "" }
451+
predicate start() { hasReturn = false and content = noContent() }
413452

414453
/**
415454
* Holds if this is the end point of type tracking.
416455
*/
417-
predicate end() { content = "" }
456+
predicate end() { content = noContent() }
418457

419458
/**
420459
* INTERNAL. DO NOT USE.
@@ -429,7 +468,7 @@ class TypeBackTracker extends TTypeBackTracker {
429468
*
430469
* This predicate is only defined if the type has not been tracked into a piece of content.
431470
*/
432-
TypeBackTracker continue() { content = "" and result = this }
471+
TypeBackTracker continue() { content = noContent() and result = this }
433472

434473
/**
435474
* Gets the summary that corresponds to having taken a backwards

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,28 @@ class Node = DataFlowPublic::Node;
1111

1212
class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
1313

14+
/** A content name for use by type trackers, or the empty string. */
15+
class OptionalTypeTrackerContent extends string {
16+
OptionalTypeTrackerContent() {
17+
this = ""
18+
or
19+
this = getPossibleContentName()
20+
}
21+
}
22+
23+
/** A content name for use by type trackers. */
24+
class TypeTrackerContent extends OptionalTypeTrackerContent {
25+
TypeTrackerContent() { this != "" }
26+
}
27+
28+
/** Gets the content string representing no value. */
29+
OptionalTypeTrackerContent noContent() { result = "" }
30+
31+
pragma[inline]
32+
predicate compatibleContents(TypeTrackerContent storeContent, TypeTrackerContent loadContent) {
33+
storeContent = loadContent
34+
}
35+
1436
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
1537

1638
predicate jumpStep = DataFlowPrivate::jumpStepSharedWithTypeTracker/2;

0 commit comments

Comments
 (0)