Skip to content

Java: Support SCCs in TypeFlow. #10240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 6, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
244 changes: 200 additions & 44 deletions java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ private class TypeFlowNode extends TTypeFlowNode {
}
}

private int getNodeKind(TypeFlowNode n) {
result = 1 and n instanceof TField
or
result = 2 and n instanceof TSsa
or
result = 3 and n instanceof TExpr
or
result = 4 and n instanceof TMethod
}

/** Gets `t` if it is a `RefType` or the boxed type if `t` is a primitive type. */
private RefType boxIfNeeded(Type t) {
t.(PrimitiveType).getBoxedType() = result or
Expand Down Expand Up @@ -146,27 +156,181 @@ private predicate joinStep(TypeFlowNode n1, TypeFlowNode n2) {
joinStep0(n1, n2) and not isNull(n1)
}

private predicate joinStepRank1(int r, TypeFlowNode n1, TypeFlowNode n2) {
n1 =
rank[r](TypeFlowNode n |
joinStep(n, n2)
|
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn()
private predicate anyStep(TypeFlowNode n1, TypeFlowNode n2) { joinStep(n1, n2) or step(n1, n2) }

private import SccReduction

/**
* SCC reduction.
*
* This ought to be as easy as `equivalenceRelation(sccEdge/2)(n, scc)`, but
* this HOP is not currently supported for newtypes.
*
* A straightforward implementation would be:
* ```ql
* predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
* scc =
* max(TypeFlowNode n2 |
* sccEdge+(n, n2)
* |
* n2
* order by
* n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
* )
* }
*
* ```
* but this is quadratic in the size of the SCCs.
*
* Instead we find local maxima by following SCC edges and determine the SCC
* representatives from those.
* (This is still worst-case quadratic in the size of the SCCs, but generally
* performs better.)
*/
private module SccReduction {
private predicate sccEdge(TypeFlowNode n1, TypeFlowNode n2) {
anyStep(n1, n2) and anyStep+(n2, n1)
}

private predicate sccEdgeWithMax(TypeFlowNode n1, TypeFlowNode n2, TypeFlowNode m) {
sccEdge(n1, n2) and
m =
max(TypeFlowNode n |
n = [n1, n2]
|
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn(), getNodeKind(n)
)
}

private predicate hasLargerNeighbor(TypeFlowNode n) {
exists(TypeFlowNode n2 |
sccEdgeWithMax(n, n2, n2) and
not sccEdgeWithMax(n, n2, n)
or
sccEdgeWithMax(n2, n, n2) and
not sccEdgeWithMax(n2, n, n)
)
}

private predicate localMax(TypeFlowNode m) {
sccEdgeWithMax(_, _, m) and
not hasLargerNeighbor(m)
}

private predicate sccReprFromLocalMax(TypeFlowNode scc) {
exists(TypeFlowNode m |
localMax(m) and
scc =
max(TypeFlowNode n2 |
sccEdge+(m, n2) and localMax(n2)
|
n2
order by
n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
)
)
}

/** Holds if `n` is part of an SCC of size 2 or more represented by `scc`. */
predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
sccEdge+(n, scc) and sccReprFromLocalMax(scc)
}

predicate sccJoinStep(TypeFlowNode n, TypeFlowNode scc) {
exists(TypeFlowNode mid |
joinStep(n, mid) and
sccRepr(mid, scc) and
not sccRepr(n, scc)
)
}
}

private predicate joinStepRank2(int r2, int r1, TypeFlowNode n) {
r1 = rank[r2](int r | joinStepRank1(r, _, n) | r)
private signature predicate edgeSig(TypeFlowNode n1, TypeFlowNode n2);

private signature module RankedEdge {
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2);

int lastRank(TypeFlowNode n);
}

private predicate joinStepRank(int r, TypeFlowNode n1, TypeFlowNode n2) {
exists(int r1 |
joinStepRank1(r1, n1, n2) and
joinStepRank2(r, r1, n2)
)
private module RankEdge<edgeSig/2 edge> implements RankedEdge {
/**
* Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. The used
* ordering is not necessarily total, so the ranking may have gaps.
*/
private predicate edgeRank1(int r, TypeFlowNode n1, TypeFlowNode n2) {
n1 =
rank[r](TypeFlowNode n |
edge(n, n2)
|
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn()
)
}

/**
* Holds if `r2` is a ranking of the ranks from `edgeRank1`. This removes the
* gaps from the ranking.
*/
private predicate edgeRank2(int r2, int r1, TypeFlowNode n) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed offline, I think this predicate deserves a comment.

r1 = rank[r2](int r | edgeRank1(r, _, n) | r)
}

/** Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. */
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2) {
exists(int r1 |
edgeRank1(r1, n1, n2) and
edgeRank2(r, r1, n2)
)
}

int lastRank(TypeFlowNode n) { result = max(int r | edgeRank(r, _, n)) }
}

private signature module TypePropagation {
predicate candType(TypeFlowNode n, RefType t);

bindingset[t]
predicate supportsType(TypeFlowNode n, RefType t);
}

/** Implements recursion through `forall` by way of edge ranking. */
private module ForAll<RankedEdge Edge, TypePropagation T> {
/**
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
* thus is a candidate bound for `n`.
*/
pragma[nomagic]
private predicate candJoinType(TypeFlowNode n, RefType t) {
exists(TypeFlowNode mid |
T::candType(mid, t) and
Edge::edgeRank(_, mid, n)
)
}

/**
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
* through the edges into `n` ranked from `1` to `r`.
*/
private predicate flowJoin(int r, TypeFlowNode n, RefType t) {
(
r = 1 and candJoinType(n, t)
or
flowJoin(r - 1, n, t) and Edge::edgeRank(r, _, n)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the edgeRank conjunct be removed if the forall below becomes a forex?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Semantically, yes. But the codegen will be worse. A forex(vars | range | term) currently desugars to exists(vars | range | term) and forall(vars | range | term) so that would give an additional recursive call (which we might or might not be lucky enough to have the optimiser deduplicate at the RA level after join-ordering, since the delta of a forall is equal to the delta of a forex).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

) and
forall(TypeFlowNode mid | Edge::edgeRank(r, mid, n) | T::supportsType(mid, t))
}

/**
* Holds if `t` is a candidate bound for `n` that is also valid for data
* coming through all the incoming edges, and therefore is a valid bound for
* `n`.
*/
predicate flowJoin(TypeFlowNode n, RefType t) { flowJoin(Edge::lastRank(n), n, t) }
}

private int lastRank(TypeFlowNode n) { result = max(int r | joinStepRank(r, _, n)) }
module RankedJoinStep = RankEdge<joinStep/2>;

module RankedSccJoinStep = RankEdge<sccJoinStep/2>;

private predicate exactTypeBase(TypeFlowNode n, RefType t) {
exists(ClassInstanceExpr e |
Expand All @@ -177,15 +341,10 @@ private predicate exactTypeBase(TypeFlowNode n, RefType t) {
)
}

private predicate exactTypeRank(int r, TypeFlowNode n, RefType t) {
forall(TypeFlowNode mid | joinStepRank(r, mid, n) | exactType(mid, t)) and
joinStepRank(r, _, n)
}
private module ExactTypePropagation implements TypePropagation {
predicate candType = exactType/2;

private predicate exactTypeJoin(int r, TypeFlowNode n, RefType t) {
exactTypeRank(1, n, t) and r = 1
or
exactTypeJoin(r - 1, n, t) and exactTypeRank(r, n, t)
predicate supportsType = exactType/2;
}

/**
Expand All @@ -199,7 +358,14 @@ private predicate exactType(TypeFlowNode n, RefType t) {
or
// The following is an optimized version of
// `forex(TypeFlowNode mid | joinStep(mid, n) | exactType(mid, t))`
exactTypeJoin(lastRank(n), n, t)
ForAll<RankedJoinStep, ExactTypePropagation>::flowJoin(n, t)
or
exists(TypeFlowNode scc |
sccRepr(n, scc) and
// Optimized version of
// `forex(TypeFlowNode mid | sccJoinStep(mid, scc) | exactType(mid, t))`
ForAll<RankedSccJoinStep, ExactTypePropagation>::flowJoin(scc, t)
)
}

/**
Expand Down Expand Up @@ -343,30 +509,15 @@ private predicate typeFlowBase(TypeFlowNode n, RefType t) {
)
}

/**
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
* thus is a candidate bound for `n`.
*/
pragma[noinline]
private predicate typeFlowJoinCand(TypeFlowNode n, RefType t) {
exists(TypeFlowNode mid | joinStep(mid, n) | typeFlow(mid, t))
}
private module TypeFlowPropagation implements TypePropagation {
predicate candType = typeFlow/2;

/**
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
* through the edges into `n` ranked from `1` to `r`.
*/
private predicate typeFlowJoin(int r, TypeFlowNode n, RefType t) {
(
r = 1 and typeFlowJoinCand(n, t)
or
typeFlowJoin(r - 1, n, t) and joinStepRank(r, _, n)
) and
forall(TypeFlowNode mid | joinStepRank(r, mid, n) |
bindingset[t]
predicate supportsType(TypeFlowNode mid, RefType t) {
exists(RefType midtyp | exactType(mid, midtyp) or typeFlow(mid, midtyp) |
pragma[only_bind_out](midtyp).getAnAncestor() = t
)
)
}
}

/**
Expand All @@ -378,7 +529,12 @@ private predicate typeFlow(TypeFlowNode n, RefType t) {
or
exists(TypeFlowNode mid | typeFlow(mid, t) and step(mid, n))
or
typeFlowJoin(lastRank(n), n, t)
ForAll<RankedJoinStep, TypeFlowPropagation>::flowJoin(n, t)
or
exists(TypeFlowNode scc |
sccRepr(n, scc) and
ForAll<RankedSccJoinStep, TypeFlowPropagation>::flowJoin(scc, t)
)
}

pragma[nomagic]
Expand Down