Skip to content

Ruby: Context sensitive instance method resolution #10358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ruby/ql/lib/codeql/ruby/ast/Module.qll
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ class Module extends TModule {
/** Gets the super class of this module, if any. */
Module getSuperClass() { result = getSuperClass(this) }

/** Gets an immediate sub class of this module, if any. */
Module getASubClass() { this = getSuperClass(result) }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This predicate returns direct sub classes only, doesn't it? Let document that and perhaps also make it more clear from its name.


/** Gets a `prepend`ed module. */
Module getAPrependedModule() { result = getAPrependedModule(this) }

Expand Down
302 changes: 205 additions & 97 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowDispatch.qll
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,20 @@ private predicate hasAdjacentTypeCheckedReads(
)
}

/** Holds if `call` may resolve to the returned source-code method. */
private DataFlowCallable viableSourceCallable(DataFlowCall call) {
result = TCfgScope(getTarget(call.asCall())) and
not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
}

/** Holds if `call` may resolve to the returned summarized library method. */
private DataFlowCallable viableLibraryCallable(DataFlowCall call) {
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.asCall().getExpr() = callable.getACall()
)
}

cached
private module Cached {
cached
Expand Down Expand Up @@ -366,13 +380,9 @@ private module Cached {
/** Gets a viable run-time target for the call `call`. */
cached
DataFlowCallable viableCallable(DataFlowCall call) {
result = TCfgScope(getTarget(call.asCall())) and
not call.asCall().getExpr() instanceof YieldCall // handled by `lambdaCreation`/`lambdaCall`
result = viableSourceCallable(call)
or
exists(LibraryCallable callable |
result = TLibraryCallable(callable) and
call.asCall().getExpr() = callable.getACall()
)
result = viableLibraryCallable(call)
}

cached
Expand Down Expand Up @@ -438,90 +448,103 @@ private DataFlow::LocalSourceNode trackModuleAccess(Module m) {
result = trackModuleAccess(m, TypeTracker::end())
}

pragma[nomagic]
private DataFlow::Node trackInstance(Module tp, boolean exact, TypeTracker t) {
t.start() and
(
result.asExpr().getExpr() instanceof NilLiteral and
tp = TResolved("NilClass") and
exact = true
or
result.asExpr().getExpr().(BooleanLiteral).isFalse() and
tp = TResolved("FalseClass") and
exact = true
or
result.asExpr().getExpr().(BooleanLiteral).isTrue() and
tp = TResolved("TrueClass") and
exact = true
or
result.asExpr().getExpr() instanceof IntegerLiteral and
tp = TResolved("Integer") and
exact = true
or
result.asExpr().getExpr() instanceof FloatLiteral and
tp = TResolved("Float") and
exact = true
or
result.asExpr().getExpr() instanceof RationalLiteral and
tp = TResolved("Rational") and
exact = true
or
result.asExpr().getExpr() instanceof ComplexLiteral and
tp = TResolved("Complex") and
exact = true
or
result.asExpr().getExpr() instanceof StringlikeLiteral and
tp = TResolved("String") and
exact = true
or
result.asExpr() instanceof CfgNodes::ExprNodes::ArrayLiteralCfgNode and
tp = TResolved("Array") and
exact = true
or
result.asExpr() instanceof CfgNodes::ExprNodes::HashLiteralCfgNode and
tp = TResolved("Hash") and
exact = true
or
result.asExpr().getExpr() instanceof MethodBase and
tp = TResolved("Symbol") and
exact = true
or
result.asParameter() instanceof BlockParameter and
tp = TResolved("Proc") and
exact = true
/** Holds if `n` is an instance of type `tp`. */
private predicate isInstance(DataFlow::Node n, Module tp, boolean exact) {
n.asExpr().getExpr() instanceof NilLiteral and
tp = TResolved("NilClass") and
exact = true
or
n.asExpr().getExpr().(BooleanLiteral).isFalse() and
tp = TResolved("FalseClass") and
exact = true
or
n.asExpr().getExpr().(BooleanLiteral).isTrue() and
tp = TResolved("TrueClass") and
exact = true
or
n.asExpr().getExpr() instanceof IntegerLiteral and
tp = TResolved("Integer") and
exact = true
or
n.asExpr().getExpr() instanceof FloatLiteral and
tp = TResolved("Float") and
exact = true
or
n.asExpr().getExpr() instanceof RationalLiteral and
tp = TResolved("Rational") and
exact = true
or
n.asExpr().getExpr() instanceof ComplexLiteral and
tp = TResolved("Complex") and
exact = true
or
n.asExpr().getExpr() instanceof StringlikeLiteral and
tp = TResolved("String") and
exact = true
or
n.asExpr() instanceof CfgNodes::ExprNodes::ArrayLiteralCfgNode and
tp = TResolved("Array") and
exact = true
or
n.asExpr() instanceof CfgNodes::ExprNodes::HashLiteralCfgNode and
tp = TResolved("Hash") and
exact = true
or
n.asExpr().getExpr() instanceof MethodBase and
tp = TResolved("Symbol") and
exact = true
or
n.asParameter() instanceof BlockParameter and
tp = TResolved("Proc") and
exact = true
or
n.asExpr().getExpr() instanceof Lambda and
tp = TResolved("Proc") and
exact = true
or
exists(CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode |
flowsToMethodCall(call, sourceNode, "new") and
exact = true and
n.asExpr() = call
|
// `C.new`
sourceNode = trackModuleAccess(tp)
or
result.asExpr().getExpr() instanceof Lambda and
tp = TResolved("Proc") and
exact = true
// `self.new` inside a module
selfInModule(sourceNode.(SsaSelfDefinitionNode).getVariable(), tp)
or
exists(CfgNodes::ExprNodes::CallCfgNode call, DataFlow::LocalSourceNode sourceNode |
flowsToMethodCall(call, sourceNode, "new") and
exact = true and
result.asExpr() = call
|
// `C.new`
sourceNode = trackModuleAccess(tp)
or
// `self.new` inside a module
selfInModule(sourceNode.(SsaSelfDefinitionNode).getVariable(), tp)
// `self.new` inside a singleton method
selfInMethod(sourceNode.(SsaSelfDefinitionNode).getVariable(), any(SingletonMethod sm), tp)
)
or
// `self` reference in method or top-level (but not in module or singleton method,
// where instance methods cannot be called; only singleton methods)
n =
any(SsaSelfDefinitionNode self |
exists(MethodBase m |
selfInMethod(self.getVariable(), m, tp) and
not m instanceof SingletonMethod and
if m.getEnclosingModule() instanceof Toplevel then exact = true else exact = false
)
or
// `self.new` inside a singleton method
selfInMethod(sourceNode.(SsaSelfDefinitionNode).getVariable(), any(SingletonMethod sm), tp)
selfInToplevel(self.getVariable(), tp) and
exact = true
)
or
// `self` reference in method or top-level (but not in module or singleton method,
// where instance methods cannot be called; only singleton methods)
result =
any(SsaSelfDefinitionNode self |
exists(MethodBase m |
selfInMethod(self.getVariable(), m, tp) and
not m instanceof SingletonMethod and
if m.getEnclosingModule() instanceof Toplevel then exact = true else exact = false
)
or
selfInToplevel(self.getVariable(), tp) and
exact = true
)
or
// `in C => c then c.foo`
asModulePattern(n, tp) and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps also add C === x tests. If I'm not mistaken, both types of case expressions are implemented using that operator.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see you just moved these predicate. Perhaps we should post-pone === for a future PR. We might want to desugar case statements into if at some point too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we can do that in another PR. I guess is could be added to hasAdjacentTypeCheckedReads.

exact = false
or
// `case object when C then object.foo`
hasAdjacentTypeCheckedReads(_, _, n.asExpr(), tp) and
exact = false
}

pragma[nomagic]
private DataFlow::Node trackInstance(Module tp, boolean exact, TypeTracker t) {
t.start() and
(
isInstance(result, tp, exact)
or
exists(Module m |
(if m.isClass() then tp = TResolved("Class") else tp = TResolved("Module")) and
Expand All @@ -536,14 +559,6 @@ private DataFlow::Node trackInstance(Module tp, boolean exact, TypeTracker t) {
// needed for e.g. `self.puts`
selfInMethod(result.(SsaSelfDefinitionNode).getVariable(), any(SingletonMethod sm), m)
)
or
// `in C => c then c.foo`
asModulePattern(result, tp) and
exact = false
or
// `case object when C then object.foo`
hasAdjacentTypeCheckedReads(_, _, result.asExpr(), tp) and
exact = false
)
or
exists(TypeTracker t2, StepSummary summary |
Expand Down Expand Up @@ -778,19 +793,112 @@ private DataFlow::Node trackSingletonMethodOnInstance(MethodBase method, string
result = trackSingletonMethodOnInstance(method, name, TypeTracker::end())
}

/** Same as `isInstance`, but includes local must-flow through SSA definitions. */
private predicate isInstanceLocalMustFlow(DataFlow::Node n, Module tp, boolean exact) {
isInstance(n, tp, exact)
or
exists(DataFlow::Node mid | isInstanceLocalMustFlow(mid, tp, exact) |
n.asExpr() = mid.(SsaDefinitionNode).getDefinition().getARead()
or
n.(SsaDefinitionNode).getDefinition().(Ssa::WriteDefinition).assigns(mid.asExpr())
)
}

/**
* Holds if `ctx` targets `encl`, which is the enclosing callable of `call`, the receiver
* of `call` is a parameter access, where the corresponding argument of `ctx` is `arg`.
*
* `name` is the name of the method being called by `call`.
*/
pragma[nomagic]
private predicate mayBenefitFromCallContext0(
CfgNodes::ExprNodes::CallCfgNode ctx, ArgumentNode arg, CfgNodes::ExprNodes::CallCfgNode call,
Callable encl, string name
) {
exists(
ParameterNodeImpl p, SsaDefinitionNode ssaNode, ParameterPosition ppos, ArgumentPosition apos
|
// the receiver of `call` references `p`
ssaNode = trackInstance(_, _) and
LocalFlow::localFlowSsaParamInput(p, ssaNode) and
flowsToMethodCall(pragma[only_bind_into](call), pragma[only_bind_into](ssaNode),
pragma[only_bind_into](name)) and
// `p` is a parameter of `encl`,
encl = call.getScope() and
p.isParameterOf(TCfgScope(encl), ppos) and
// `ctx` targets `encl`
getTarget(ctx) = encl and
// `arg` is the argument for `p` in the call `ctx`
arg.sourceArgumentOf(ctx, apos) and
parameterMatch(ppos, apos)
)
}

/**
* Holds if `ctx` targets `encl`, which is the enclosing callable of `call`, and
* the receiver of `call` is a parameter access, where the corresponding argument
* of `ctx` has type `tp`.
*
* `name` is the name of the method being called by `call`, and `exact` is pertaining
* to the type of the argument.
*/
pragma[nomagic]
private predicate mayBenefitFromCallContext1(
CfgNodes::ExprNodes::CallCfgNode ctx, CfgNodes::ExprNodes::CallCfgNode call, Callable encl,
Module tp, boolean exact, string name
) {
exists(ArgumentNode arg |
mayBenefitFromCallContext0(ctx, arg, call, encl, name) and
// `arg` has a relevant instance type
isInstanceLocalMustFlow(arg, pragma[only_bind_out](tp), exact) and
exists(lookupMethod(tp, pragma[only_bind_into](name)))
)
}

/**
* Holds if the set of viable implementations that can be called by `call`
* might be improved by knowing the call context. This is the case if the
* qualifier accesses a parameter of the enclosing callable `c` (including
* receiver accesses a parameter of the enclosing callable `c` (including
* the implicit `self` parameter).
*/
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) { none() }
predicate mayBenefitFromCallContext(DataFlowCall call, DataFlowCallable c) {
mayBenefitFromCallContext1(_, call.asCall(), c.asCallable(), _, _, _)
}

/**
* Gets a viable dispatch target of `call` in the context `ctx`. This is
* restricted to those `call`s for which a context might make a difference.
*/
DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { none() }
pragma[nomagic]
DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) {
// `ctx` can provide a potentially better type bound
exists(CfgNodes::ExprNodes::CallCfgNode call0, Callable res |
call0 = call.asCall() and
res = result.asCallable() and
res = getTarget(call0) and // make sure to not include e.g. private methods
exists(Module tp, Module m, boolean exact, string name |
res = lookupMethod(tp, name) and
mayBenefitFromCallContext1(ctx.asCall(), pragma[only_bind_into](call0), _,
pragma[only_bind_into](m), exact, pragma[only_bind_into](name))
|
tp = m
or
exact = false and
tp.getSuperClass+() = m
)
)
or
// `ctx` cannot provide a type bound
exists(ArgumentNode arg |
mayBenefitFromCallContext0(ctx.asCall(), arg, call.asCall(), _, _) and
not isInstanceLocalMustFlow(arg, _, _) and
result = viableSourceCallable(call)
)
or
// library calls should always be able to resolve
mayBenefitFromCallContext0(ctx.asCall(), _, call.asCall(), _, _) and
result = viableLibraryCallable(call)
}

predicate exprNodeReturnedFrom = exprNodeReturnedFromCached/2;

Expand Down
Loading