Skip to content

Commit 3ae4848

Browse files
authored
Merge pull request #15326 from github/max-schaefer/automodel-negative-sink-models
Automodel: Apply negative characteristics only to endpoints of the right kind.
2 parents 4660a25 + 8614d7b commit 3ae4848

17 files changed

+348
-264
lines changed

java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

+90-97
Large diffs are not rendered by default.

java/ql/automodel/src/AutomodelApplicationModeExtractCandidates.ql

+10-14
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,20 @@ private import AutomodelJavaUtil
2525
bindingset[limit]
2626
private Endpoint getSampleForSignature(
2727
int limit, string package, string type, string subtypes, string name, string signature,
28-
string input, string output, string isVarargs, string extensibleType
28+
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
2929
) {
3030
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
3131
num_endpoints =
3232
count(Endpoint e |
33-
e.getExtensibleType() = extensibleType and
34-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
33+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
34+
alreadyAiModeled, extensibleType)
3535
)
3636
|
3737
result =
3838
rank[n](Endpoint e, Location loc |
3939
loc = e.asTop().getLocation() and
40-
e.getExtensibleType() = extensibleType and
41-
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
40+
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
41+
alreadyAiModeled, extensibleType)
4242
|
4343
e
4444
order by
@@ -63,22 +63,18 @@ where
6363
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
6464
u.appliesToEndpoint(endpoint)
6565
) and
66-
CharacteristicsImpl::isSinkCandidate(endpoint, _) and
66+
CharacteristicsImpl::isCandidate(endpoint, _) and
6767
endpoint =
6868
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
69-
isVarargsArray, extensibleType) and
69+
isVarargsArray, extensibleType, alreadyAiModeled) and
70+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
71+
isVarargsArray, alreadyAiModeled, extensibleType) and
7072
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
7173
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
7274
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
7375
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
7476
// types, and we don't need to reexamine it.
75-
(
76-
not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
77-
or
78-
alreadyAiModeled.matches("%ai-%") and
79-
CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
80-
) and
81-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
77+
alreadyAiModeled.matches(["", "%ai-%"]) and
8278
includeAutomodelCandidate(package, type, name, signature)
8379
select endpoint.asNode(),
8480
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelApplicationModeExtractNegativeExamples.ql

+35-17
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,45 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
4040
)
4141
}
4242

43+
predicate candidate(
44+
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
45+
string type, string subtypes, string name, string signature, string input, string output,
46+
string isVarargsArray, string extensibleType
47+
) {
48+
// the node is known not to be an endpoint of any appropriate type
49+
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
50+
characteristic.hasImplications(tp, false, _)
51+
) and
52+
// the lowest confidence across all endpoint types should be at least highConfidence
53+
confidence =
54+
min(float c |
55+
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
56+
) and
57+
confidence >= SharedCharacteristics::highConfidence() and
58+
any(ApplicationModeMetadataExtractor meta)
59+
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
60+
isVarargsArray, _, extensibleType) and
61+
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
62+
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
63+
not exists(EndpointCharacteristic characteristic2, float confidence2 |
64+
characteristic2 != characteristic
65+
|
66+
characteristic2.appliesToEndpoint(endpoint) and
67+
confidence2 >= SharedCharacteristics::maximalConfidence() and
68+
characteristic2
69+
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
70+
)
71+
}
72+
4373
from
4474
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
45-
ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type,
46-
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
47-
DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType
75+
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
76+
DollarAtString signature, DollarAtString input, DollarAtString output,
77+
DollarAtString isVarargsArray, DollarAtString extensibleType
4878
where
4979
endpoint = getSampleForCharacteristic(characteristic, 100) and
50-
extensibleType = endpoint.getExtensibleType() and
51-
confidence >= SharedCharacteristics::highConfidence() and
52-
characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
53-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
54-
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
55-
// treated by the actual query as a sanitizer, since the final logic is something like
56-
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
57-
// they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
58-
not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType |
59-
not positiveType instanceof NegativeSinkType and
60-
characteristic2.appliesToEndpoint(endpoint) and
61-
confidence2 >= SharedCharacteristics::maximalConfidence() and
62-
characteristic2.hasImplications(positiveType, true, confidence2)
63-
) and
80+
candidate(endpoint, characteristic, confidence, package, type, subtypes, name, signature, input,
81+
output, isVarargsArray, extensibleType) and
6482
message = characteristic
6583
select endpoint.asNode(),
6684
message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //

java/ql/automodel/src/AutomodelApplicationModeExtractPositiveExamples.ql

+2-3
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ from
1818
DollarAtString signature, DollarAtString input, DollarAtString output,
1919
DollarAtString isVarargsArray, DollarAtString extensibleType
2020
where
21-
extensibleType = endpoint.getExtensibleType() and
22-
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
23-
// Extract positive examples of sinks belonging to the existing ATM query configurations.
21+
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
22+
isVarargsArray, _, extensibleType) and
2423
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
2524
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
2625
select endpoint.asNode(),

java/ql/automodel/src/AutomodelEndpointTypes.qll

-5
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,6 @@ abstract class SinkType extends EndpointType {
3030
SinkType() { any() }
3131
}
3232

33-
/** The `Negative` class for non-sinks. */
34-
class NegativeSinkType extends SinkType {
35-
NegativeSinkType() { this = "non-sink" }
36-
}
37-
3833
/** A sink relevant to the SQL injection query */
3934
class SqlInjectionSinkType extends SinkType {
4035
SqlInjectionSinkType() { this = "sql-injection" }

java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll

+75-37
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ private import semmle.code.java.security.QueryInjection
1515
private import semmle.code.java.security.RequestForgery
1616
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
1717
private import AutomodelJavaUtil as AutomodelJavaUtil
18-
private import AutomodelSharedGetCallable as AutomodelSharedGetCallable
1918
import AutomodelSharedCharacteristics as SharedCharacteristics
2019
import AutomodelEndpointTypes as AutomodelEndpointTypes
2120

@@ -84,7 +83,7 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint {
8483
/**
8584
* Returns the callable that contains the endpoint.
8685
*/
87-
abstract Callable getEnclosingCallable();
86+
abstract Callable getCallable();
8887

8988
abstract Top asTop();
9089

@@ -106,7 +105,7 @@ class ExplicitParameterEndpoint extends FrameworkModeEndpoint, TExplicitParamete
106105

107106
override string getParamName() { result = param.getName() }
108107

109-
override Callable getEnclosingCallable() { result = param.getCallable() }
108+
override Callable getCallable() { result = param.getCallable() }
110109

111110
override Top asTop() { result = param }
112111

@@ -126,7 +125,7 @@ class QualifierEndpoint extends FrameworkModeEndpoint, TQualifier {
126125

127126
override string getParamName() { result = "this" }
128127

129-
override Callable getEnclosingCallable() { result = callable }
128+
override Callable getCallable() { result = callable }
130129

131130
override Top asTop() { result = callable }
132131

@@ -144,7 +143,7 @@ class ReturnValue extends FrameworkModeEndpoint, TReturnValue {
144143

145144
override string getParamName() { none() }
146145

147-
override Callable getEnclosingCallable() { result = callable }
146+
override Callable getCallable() { result = callable }
148147

149148
override Top asTop() { result = callable }
150149

@@ -163,7 +162,7 @@ class OverridableParameter extends FrameworkModeEndpoint, TOverridableParameter
163162

164163
override string getParamName() { result = param.getName() }
165164

166-
override Callable getEnclosingCallable() { result = method }
165+
override Callable getCallable() { result = method }
167166

168167
override Top asTop() { result = param }
169168

@@ -181,7 +180,7 @@ class OverridableQualifier extends FrameworkModeEndpoint, TOverridableQualifier
181180

182181
override string getParamName() { result = "this" }
183182

184-
override Callable getEnclosingCallable() { result = m }
183+
override Callable getCallable() { result = m }
185184

186185
override Top asTop() { result = m }
187186

@@ -202,7 +201,9 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
202201

203202
class EndpointType = AutomodelEndpointTypes::EndpointType;
204203

205-
class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
204+
class SinkType = AutomodelEndpointTypes::SinkType;
205+
206+
class SourceType = AutomodelEndpointTypes::SourceType;
206207

207208
class RelatedLocation = Location::Top;
208209

@@ -244,8 +245,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
244245
additional predicate sinkSpec(
245246
Endpoint e, string package, string type, string name, string signature, string ext, string input
246247
) {
247-
e.getEnclosingCallable().hasQualifiedName(package, type, name) and
248-
signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and
248+
e.getCallable().hasQualifiedName(package, type, name) and
249+
signature = ExternalFlow::paramsString(e.getCallable()) and
249250
ext = "" and
250251
input = e.getMaDInput()
251252
}
@@ -254,8 +255,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
254255
Endpoint e, string package, string type, string name, string signature, string ext,
255256
string output
256257
) {
257-
e.getEnclosingCallable().hasQualifiedName(package, type, name) and
258-
signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and
258+
e.getCallable().hasQualifiedName(package, type, name) and
259+
signature = ExternalFlow::paramsString(e.getCallable()) and
259260
ext = "" and
260261
output = e.getMaDOutput()
261262
}
@@ -267,10 +268,10 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
267268
*/
268269
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
269270
type = MethodDoc() and
270-
result = e.getEnclosingCallable().(Documentable).getJavadoc()
271+
result = e.getCallable().(Documentable).getJavadoc()
271272
or
272273
type = ClassDoc() and
273-
result = e.getEnclosingCallable().getDeclaringType().(Documentable).getJavadoc()
274+
result = e.getCallable().getDeclaringType().(Documentable).getJavadoc()
274275
}
275276
}
276277

@@ -292,16 +293,27 @@ class FrameworkModeMetadataExtractor extends string {
292293

293294
predicate hasMetadata(
294295
Endpoint e, string package, string type, string subtypes, string name, string signature,
295-
string input, string output, string parameterName
296+
string input, string output, string parameterName, string alreadyAiModeled,
297+
string extensibleType
296298
) {
297-
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
298-
name = e.getEnclosingCallable().getName() and
299-
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
300-
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
301-
package = e.getEnclosingCallable().getDeclaringType().getPackage().getName() and
302-
type = e.getEnclosingCallable().getDeclaringType().getErasure().(RefType).nestedName() and
303-
subtypes = AutomodelJavaUtil::considerSubtypes(e.getEnclosingCallable()).toString() and
304-
signature = ExternalFlow::paramsString(e.getEnclosingCallable())
299+
exists(Callable callable | e.getCallable() = callable |
300+
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
301+
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
302+
package = callable.getDeclaringType().getPackage().getName() and
303+
// we're using the erased types because the MaD convention is to not specify type parameters.
304+
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
305+
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
306+
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
307+
name = callable.getName() and
308+
signature = ExternalFlow::paramsString(callable) and
309+
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
310+
e.getExtensibleType() = extensibleType
311+
) and
312+
(
313+
not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = ""
314+
or
315+
CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled)
316+
)
305317
}
306318
}
307319

@@ -310,53 +322,79 @@ class FrameworkModeMetadataExtractor extends string {
310322
*/
311323

312324
/**
313-
* A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink.
325+
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
326+
* and its return value should not be considered a source.
314327
*
315328
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
316329
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
317330
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
318331
*
319332
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
320333
*/
321-
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
334+
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
335+
{
322336
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }
323337

324338
override predicate appliesToEndpoint(Endpoint e) {
325-
not FrameworkCandidatesImpl::isSink(e, _, _) and
326-
e.getEnclosingCallable().getName().matches("is%") and
327-
e.getEnclosingCallable().getReturnType() instanceof BooleanType
339+
e.getCallable().getName().matches("is%") and
340+
e.getCallable().getReturnType() instanceof BooleanType and
341+
(
342+
e.getExtensibleType() = "sinkModel" and
343+
not FrameworkCandidatesImpl::isSink(e, _, _)
344+
or
345+
e.getExtensibleType() = "sourceModel" and
346+
not FrameworkCandidatesImpl::isSource(e, _, _) and
347+
e.getMaDOutput() = "ReturnValue"
348+
)
328349
}
329350
}
330351

331352
/**
332-
* A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a
333-
* sink.
353+
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
354+
* considered sinks, and its return value should not be considered a source.
334355
*
335356
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
336357
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
337358
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
338359
*/
339-
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
360+
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
361+
{
340362
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }
341363

342364
override predicate appliesToEndpoint(Endpoint e) {
343-
not FrameworkCandidatesImpl::isSink(e, _, _) and
344365
exists(Callable callable |
345-
callable = e.getEnclosingCallable() and
366+
callable = e.getCallable() and
346367
callable.getName().toLowerCase() = ["exists", "notexists"] and
347368
callable.getReturnType() instanceof BooleanType
369+
|
370+
e.getExtensibleType() = "sinkModel" and
371+
not FrameworkCandidatesImpl::isSink(e, _, _)
372+
or
373+
e.getExtensibleType() = "sourceModel" and
374+
not FrameworkCandidatesImpl::isSource(e, _, _) and
375+
e.getMaDOutput() = "ReturnValue"
348376
)
349377
}
350378
}
351379

352380
/**
353-
* A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink.
381+
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
382+
* and its return value should not be considered a source.
354383
*/
355-
private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
384+
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
385+
{
356386
ExceptionCharacteristic() { this = "exception" }
357387

358388
override predicate appliesToEndpoint(Endpoint e) {
359-
e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable
389+
e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and
390+
(
391+
e.getExtensibleType() = "sinkModel" and
392+
not FrameworkCandidatesImpl::isSink(e, _, _)
393+
or
394+
e.getExtensibleType() = "sourceModel" and
395+
not FrameworkCandidatesImpl::isSource(e, _, _) and
396+
e.getMaDOutput() = "ReturnValue"
397+
)
360398
}
361399
}
362400

@@ -368,6 +406,6 @@ private class NotAModelApi extends CharacteristicsImpl::UninterestingToModelChar
368406
NotAModelApi() { this = "not a model API" }
369407

370408
override predicate appliesToEndpoint(Endpoint e) {
371-
not e.getEnclosingCallable() instanceof ModelExclusions::ModelApi
409+
not e.getCallable() instanceof ModelExclusions::ModelApi
372410
}
373411
}

0 commit comments

Comments
 (0)