Skip to content

Commit b84dca9

Browse files
authored
Merge pull request #10240 from aschackmull/java/scc-typeflow
Java: Support SCCs in TypeFlow.
2 parents d1867b9 + bc57d87 commit b84dca9

File tree

1 file changed

+200
-44
lines changed

1 file changed

+200
-44
lines changed

java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll

Lines changed: 200 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@ private class TypeFlowNode extends TTypeFlowNode {
5353
}
5454
}
5555

56+
private int getNodeKind(TypeFlowNode n) {
57+
result = 1 and n instanceof TField
58+
or
59+
result = 2 and n instanceof TSsa
60+
or
61+
result = 3 and n instanceof TExpr
62+
or
63+
result = 4 and n instanceof TMethod
64+
}
65+
5666
/** Gets `t` if it is a `RefType` or the boxed type if `t` is a primitive type. */
5767
private RefType boxIfNeeded(Type t) {
5868
t.(PrimitiveType).getBoxedType() = result or
@@ -146,27 +156,181 @@ private predicate joinStep(TypeFlowNode n1, TypeFlowNode n2) {
146156
joinStep0(n1, n2) and not isNull(n1)
147157
}
148158

149-
private predicate joinStepRank1(int r, TypeFlowNode n1, TypeFlowNode n2) {
150-
n1 =
151-
rank[r](TypeFlowNode n |
152-
joinStep(n, n2)
153-
|
154-
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn()
159+
private predicate anyStep(TypeFlowNode n1, TypeFlowNode n2) { joinStep(n1, n2) or step(n1, n2) }
160+
161+
private import SccReduction
162+
163+
/**
164+
* SCC reduction.
165+
*
166+
* This ought to be as easy as `equivalenceRelation(sccEdge/2)(n, scc)`, but
167+
* this HOP is not currently supported for newtypes.
168+
*
169+
* A straightforward implementation would be:
170+
* ```ql
171+
* predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
172+
* scc =
173+
* max(TypeFlowNode n2 |
174+
* sccEdge+(n, n2)
175+
* |
176+
* n2
177+
* order by
178+
* n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
179+
* )
180+
* }
181+
*
182+
* ```
183+
* but this is quadratic in the size of the SCCs.
184+
*
185+
* Instead we find local maxima by following SCC edges and determine the SCC
186+
* representatives from those.
187+
* (This is still worst-case quadratic in the size of the SCCs, but generally
188+
* performs better.)
189+
*/
190+
private module SccReduction {
191+
private predicate sccEdge(TypeFlowNode n1, TypeFlowNode n2) {
192+
anyStep(n1, n2) and anyStep+(n2, n1)
193+
}
194+
195+
private predicate sccEdgeWithMax(TypeFlowNode n1, TypeFlowNode n2, TypeFlowNode m) {
196+
sccEdge(n1, n2) and
197+
m =
198+
max(TypeFlowNode n |
199+
n = [n1, n2]
200+
|
201+
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn(), getNodeKind(n)
202+
)
203+
}
204+
205+
private predicate hasLargerNeighbor(TypeFlowNode n) {
206+
exists(TypeFlowNode n2 |
207+
sccEdgeWithMax(n, n2, n2) and
208+
not sccEdgeWithMax(n, n2, n)
209+
or
210+
sccEdgeWithMax(n2, n, n2) and
211+
not sccEdgeWithMax(n2, n, n)
155212
)
213+
}
214+
215+
private predicate localMax(TypeFlowNode m) {
216+
sccEdgeWithMax(_, _, m) and
217+
not hasLargerNeighbor(m)
218+
}
219+
220+
private predicate sccReprFromLocalMax(TypeFlowNode scc) {
221+
exists(TypeFlowNode m |
222+
localMax(m) and
223+
scc =
224+
max(TypeFlowNode n2 |
225+
sccEdge+(m, n2) and localMax(n2)
226+
|
227+
n2
228+
order by
229+
n2.getLocation().getStartLine(), n2.getLocation().getStartColumn(), getNodeKind(n2)
230+
)
231+
)
232+
}
233+
234+
/** Holds if `n` is part of an SCC of size 2 or more represented by `scc`. */
235+
predicate sccRepr(TypeFlowNode n, TypeFlowNode scc) {
236+
sccEdge+(n, scc) and sccReprFromLocalMax(scc)
237+
}
238+
239+
predicate sccJoinStep(TypeFlowNode n, TypeFlowNode scc) {
240+
exists(TypeFlowNode mid |
241+
joinStep(n, mid) and
242+
sccRepr(mid, scc) and
243+
not sccRepr(n, scc)
244+
)
245+
}
156246
}
157247

158-
private predicate joinStepRank2(int r2, int r1, TypeFlowNode n) {
159-
r1 = rank[r2](int r | joinStepRank1(r, _, n) | r)
248+
private signature predicate edgeSig(TypeFlowNode n1, TypeFlowNode n2);
249+
250+
private signature module RankedEdge {
251+
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2);
252+
253+
int lastRank(TypeFlowNode n);
160254
}
161255

162-
private predicate joinStepRank(int r, TypeFlowNode n1, TypeFlowNode n2) {
163-
exists(int r1 |
164-
joinStepRank1(r1, n1, n2) and
165-
joinStepRank2(r, r1, n2)
166-
)
256+
private module RankEdge<edgeSig/2 edge> implements RankedEdge {
257+
/**
258+
* Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. The used
259+
* ordering is not necessarily total, so the ranking may have gaps.
260+
*/
261+
private predicate edgeRank1(int r, TypeFlowNode n1, TypeFlowNode n2) {
262+
n1 =
263+
rank[r](TypeFlowNode n |
264+
edge(n, n2)
265+
|
266+
n order by n.getLocation().getStartLine(), n.getLocation().getStartColumn()
267+
)
268+
}
269+
270+
/**
271+
* Holds if `r2` is a ranking of the ranks from `edgeRank1`. This removes the
272+
* gaps from the ranking.
273+
*/
274+
private predicate edgeRank2(int r2, int r1, TypeFlowNode n) {
275+
r1 = rank[r2](int r | edgeRank1(r, _, n) | r)
276+
}
277+
278+
/** Holds if `r` is a ranking of the incoming edges `(n1,n2)` to `n2`. */
279+
predicate edgeRank(int r, TypeFlowNode n1, TypeFlowNode n2) {
280+
exists(int r1 |
281+
edgeRank1(r1, n1, n2) and
282+
edgeRank2(r, r1, n2)
283+
)
284+
}
285+
286+
int lastRank(TypeFlowNode n) { result = max(int r | edgeRank(r, _, n)) }
287+
}
288+
289+
private signature module TypePropagation {
290+
predicate candType(TypeFlowNode n, RefType t);
291+
292+
bindingset[t]
293+
predicate supportsType(TypeFlowNode n, RefType t);
294+
}
295+
296+
/** Implements recursion through `forall` by way of edge ranking. */
297+
private module ForAll<RankedEdge Edge, TypePropagation T> {
298+
/**
299+
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
300+
* thus is a candidate bound for `n`.
301+
*/
302+
pragma[nomagic]
303+
private predicate candJoinType(TypeFlowNode n, RefType t) {
304+
exists(TypeFlowNode mid |
305+
T::candType(mid, t) and
306+
Edge::edgeRank(_, mid, n)
307+
)
308+
}
309+
310+
/**
311+
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
312+
* through the edges into `n` ranked from `1` to `r`.
313+
*/
314+
private predicate flowJoin(int r, TypeFlowNode n, RefType t) {
315+
(
316+
r = 1 and candJoinType(n, t)
317+
or
318+
flowJoin(r - 1, n, t) and Edge::edgeRank(r, _, n)
319+
) and
320+
forall(TypeFlowNode mid | Edge::edgeRank(r, mid, n) | T::supportsType(mid, t))
321+
}
322+
323+
/**
324+
* Holds if `t` is a candidate bound for `n` that is also valid for data
325+
* coming through all the incoming edges, and therefore is a valid bound for
326+
* `n`.
327+
*/
328+
predicate flowJoin(TypeFlowNode n, RefType t) { flowJoin(Edge::lastRank(n), n, t) }
167329
}
168330

169-
private int lastRank(TypeFlowNode n) { result = max(int r | joinStepRank(r, _, n)) }
331+
module RankedJoinStep = RankEdge<joinStep/2>;
332+
333+
module RankedSccJoinStep = RankEdge<sccJoinStep/2>;
170334

171335
private predicate exactTypeBase(TypeFlowNode n, RefType t) {
172336
exists(ClassInstanceExpr e |
@@ -177,15 +341,10 @@ private predicate exactTypeBase(TypeFlowNode n, RefType t) {
177341
)
178342
}
179343

180-
private predicate exactTypeRank(int r, TypeFlowNode n, RefType t) {
181-
forall(TypeFlowNode mid | joinStepRank(r, mid, n) | exactType(mid, t)) and
182-
joinStepRank(r, _, n)
183-
}
344+
private module ExactTypePropagation implements TypePropagation {
345+
predicate candType = exactType/2;
184346

185-
private predicate exactTypeJoin(int r, TypeFlowNode n, RefType t) {
186-
exactTypeRank(1, n, t) and r = 1
187-
or
188-
exactTypeJoin(r - 1, n, t) and exactTypeRank(r, n, t)
347+
predicate supportsType = exactType/2;
189348
}
190349

191350
/**
@@ -199,7 +358,14 @@ private predicate exactType(TypeFlowNode n, RefType t) {
199358
or
200359
// The following is an optimized version of
201360
// `forex(TypeFlowNode mid | joinStep(mid, n) | exactType(mid, t))`
202-
exactTypeJoin(lastRank(n), n, t)
361+
ForAll<RankedJoinStep, ExactTypePropagation>::flowJoin(n, t)
362+
or
363+
exists(TypeFlowNode scc |
364+
sccRepr(n, scc) and
365+
// Optimized version of
366+
// `forex(TypeFlowNode mid | sccJoinStep(mid, scc) | exactType(mid, t))`
367+
ForAll<RankedSccJoinStep, ExactTypePropagation>::flowJoin(scc, t)
368+
)
203369
}
204370

205371
/**
@@ -343,30 +509,15 @@ private predicate typeFlowBase(TypeFlowNode n, RefType t) {
343509
)
344510
}
345511

346-
/**
347-
* Holds if `t` is a bound that holds on one of the incoming edges to `n` and
348-
* thus is a candidate bound for `n`.
349-
*/
350-
pragma[noinline]
351-
private predicate typeFlowJoinCand(TypeFlowNode n, RefType t) {
352-
exists(TypeFlowNode mid | joinStep(mid, n) | typeFlow(mid, t))
353-
}
512+
private module TypeFlowPropagation implements TypePropagation {
513+
predicate candType = typeFlow/2;
354514

355-
/**
356-
* Holds if `t` is a candidate bound for `n` that is also valid for data coming
357-
* through the edges into `n` ranked from `1` to `r`.
358-
*/
359-
private predicate typeFlowJoin(int r, TypeFlowNode n, RefType t) {
360-
(
361-
r = 1 and typeFlowJoinCand(n, t)
362-
or
363-
typeFlowJoin(r - 1, n, t) and joinStepRank(r, _, n)
364-
) and
365-
forall(TypeFlowNode mid | joinStepRank(r, mid, n) |
515+
bindingset[t]
516+
predicate supportsType(TypeFlowNode mid, RefType t) {
366517
exists(RefType midtyp | exactType(mid, midtyp) or typeFlow(mid, midtyp) |
367518
pragma[only_bind_out](midtyp).getAnAncestor() = t
368519
)
369-
)
520+
}
370521
}
371522

372523
/**
@@ -378,7 +529,12 @@ private predicate typeFlow(TypeFlowNode n, RefType t) {
378529
or
379530
exists(TypeFlowNode mid | typeFlow(mid, t) and step(mid, n))
380531
or
381-
typeFlowJoin(lastRank(n), n, t)
532+
ForAll<RankedJoinStep, TypeFlowPropagation>::flowJoin(n, t)
533+
or
534+
exists(TypeFlowNode scc |
535+
sccRepr(n, scc) and
536+
ForAll<RankedSccJoinStep, TypeFlowPropagation>::flowJoin(scc, t)
537+
)
382538
}
383539

384540
pragma[nomagic]

0 commit comments

Comments
 (0)