Skip to content

Commit 6bd9d82

Browse files
authored
Merge pull request #8061 from RasmusWL/orm
Python: Add data-flow through Django ORM models
2 parents 1d45996 + 758a81c commit 6bd9d82

36 files changed

+1723
-13
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: majorAnalysis
3+
---
4+
* Added data-flow for Django ORM models that are saved in a database (no `models.ForeignKey` support).

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@ private import DataFlowPublic
33
import semmle.python.SpecialMethods
44
private import semmle.python.essa.SsaCompute
55
private import semmle.python.dataflow.new.internal.ImportStar
6+
// Since we allow extra data-flow steps from modeled frameworks, we import these
7+
// up-front, to ensure these are included. This provides a more seamless experience from
8+
// a user point of view, since they don't need to know they need to import a specific
9+
// set of .qll files to get the same data-flow steps as they are used to seeing. This
10+
// also ensures that we don't end up re-evaluating data-flow because it has different
11+
// global steps in some configurations.
12+
//
13+
// This matches behavior in C#.
14+
private import semmle.python.Frameworks
615

716
/** Gets the callable in which this node occurs. */
817
DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() }
@@ -943,6 +952,24 @@ string ppReprType(DataFlowType t) { none() }
943952
* taken into account.
944953
*/
945954
predicate jumpStep(Node nodeFrom, Node nodeTo) {
955+
jumpStepSharedWithTypeTracker(nodeFrom, nodeTo)
956+
or
957+
jumpStepNotSharedWithTypeTracker(nodeFrom, nodeTo)
958+
}
959+
960+
/**
961+
* Set of jumpSteps that are shared with type-tracker implementation.
962+
*
963+
* For ORM modeling we want to add jumpsteps to global dataflow, but since these are
964+
* based on type-trackers, it's important that these new ORM jumsteps are not used in
965+
* the type-trackers as well, as that would make evaluation of type-tracking recursive
966+
* with the new jumpsteps.
967+
*
968+
* Holds if `pred` can flow to `succ`, by jumping from one callable to
969+
* another. Additional steps specified by the configuration are *not*
970+
* taken into account.
971+
*/
972+
predicate jumpStepSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
946973
runtimeJumpStep(nodeFrom, nodeTo)
947974
or
948975
// Read of module attribute:
@@ -956,6 +983,22 @@ predicate jumpStep(Node nodeFrom, Node nodeTo) {
956983
defaultValueFlowStep(nodeFrom, nodeTo)
957984
}
958985

986+
/**
987+
* Set of jumpSteps that are NOT shared with type-tracker implementation.
988+
*
989+
* For ORM modeling we want to add jumpsteps to global dataflow, but since these are
990+
* based on type-trackers, it's important that these new ORM jumsteps are not used in
991+
* the type-trackers as well, as that would make evaluation of type-tracking recursive
992+
* with the new jumpsteps.
993+
*
994+
* Holds if `pred` can flow to `succ`, by jumping from one callable to
995+
* another. Additional steps specified by the configuration are *not*
996+
* taken into account.
997+
*/
998+
predicate jumpStepNotSharedWithTypeTracker(Node nodeFrom, Node nodeTo) {
999+
any(Orm::AdditionalOrmSteps es).jumpStep(nodeFrom, nodeTo)
1000+
}
1001+
9591002
/**
9601003
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
9611004
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
@@ -999,6 +1042,51 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
9991042
kwOverflowStoreStep(nodeFrom, c, nodeTo)
10001043
or
10011044
matchStoreStep(nodeFrom, c, nodeTo)
1045+
or
1046+
any(Orm::AdditionalOrmSteps es).storeStep(nodeFrom, c, nodeTo)
1047+
}
1048+
1049+
/**
1050+
* INTERNAL: Do not use.
1051+
*
1052+
* Provides classes for modeling data-flow through ORM models saved in a DB.
1053+
*/
1054+
module Orm {
1055+
/**
1056+
* INTERNAL: Do not use.
1057+
*
1058+
* A unit class for adding additional data-flow steps for ORM models.
1059+
*/
1060+
class AdditionalOrmSteps extends Unit {
1061+
/**
1062+
* Holds if data can flow from `nodeFrom` to `nodeTo` via an assignment to
1063+
* content `c`.
1064+
*/
1065+
abstract predicate storeStep(Node nodeFrom, Content c, Node nodeTo);
1066+
1067+
/**
1068+
* Holds if `pred` can flow to `succ`, by jumping from one callable to
1069+
* another. Additional steps specified by the configuration are *not*
1070+
* taken into account.
1071+
*/
1072+
abstract predicate jumpStep(Node nodeFrom, Node nodeTo);
1073+
}
1074+
1075+
/** A synthetic node representing the data for an ORM model saved in a DB. */
1076+
class SyntheticOrmModelNode extends Node, TSyntheticOrmModelNode {
1077+
Class cls;
1078+
1079+
SyntheticOrmModelNode() { this = TSyntheticOrmModelNode(cls) }
1080+
1081+
override string toString() { result = "[orm-model] " + cls.toString() }
1082+
1083+
override Scope getScope() { result = cls.getEnclosingScope() }
1084+
1085+
override Location getLocation() { result = cls.getLocation() }
1086+
1087+
/** Gets the class that defines this ORM model. */
1088+
Class getClass() { result = cls }
1089+
}
10021090
}
10031091

10041092
/** Data flows from an element of a list to the list. */

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,20 @@ newtype TNode =
8787
/**
8888
* A synthetic node representing element content in a star pattern.
8989
*/
90-
TStarPatternElementNode(MatchStarPattern target)
90+
TStarPatternElementNode(MatchStarPattern target) or
91+
/**
92+
* INTERNAL: Do not use.
93+
*
94+
* A synthetic node representing the data for an ORM model saved in a DB.
95+
*/
96+
// TODO: Limiting the classes here to the ones that are actually ORM models was
97+
// non-trivial, since that logic is based on API::Node results, and trying to do this
98+
// causes non-monotonic recursion, and makes the API graph evaluation recursive with
99+
// data-flow, which might do bad things for performance.
100+
//
101+
// So for now we live with having these synthetic ORM nodes for _all_ classes, which
102+
// is a bit wasteful, but we don't think it will hurt too much.
103+
TSyntheticOrmModelNode(Class cls)
91104

92105
/** Helper for `Node::getEnclosingCallable`. */
93106
private DataFlowCallable getCallableScope(Scope s) {

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class TypeTrackingNode = DataFlowPublic::TypeTrackingNode;
1212

1313
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStep/2;
1414

15-
predicate jumpStep = DataFlowPrivate::jumpStep/2;
15+
predicate jumpStep = DataFlowPrivate::jumpStepSharedWithTypeTracker/2;
1616

1717
/** Holds if there is a level step from `pred` to `succ`. */
1818
predicate levelStep(Node pred, Node succ) { none() }

0 commit comments

Comments
 (0)