Skip to content

Automodel: Apply negative characteristics only to endpoints of the right kind. #15326

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 90 additions & 97 deletions java/ql/automodel/src/AutomodelApplicationModeCharacteristics.qll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@ private import AutomodelJavaUtil
bindingset[limit]
private Endpoint getSampleForSignature(
int limit, string package, string type, string subtypes, string name, string signature,
string input, string output, string isVarargs, string extensibleType
string input, string output, string isVarargs, string extensibleType, string alreadyAiModeled
) {
exists(int n, int num_endpoints, ApplicationModeMetadataExtractor meta |
num_endpoints =
count(Endpoint e |
e.getExtensibleType() = extensibleType and
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType)
)
|
result =
rank[n](Endpoint e, Location loc |
loc = e.asTop().getLocation() and
e.getExtensibleType() = extensibleType and
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs)
meta.hasMetadata(e, package, type, subtypes, name, signature, input, output, isVarargs,
alreadyAiModeled, extensibleType)
|
e
order by
Expand All @@ -63,22 +63,18 @@ where
not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
u.appliesToEndpoint(endpoint)
) and
CharacteristicsImpl::isSinkCandidate(endpoint, _) and
CharacteristicsImpl::isCandidate(endpoint, _) and
endpoint =
getSampleForSignature(9, package, type, subtypes, name, signature, input, output,
isVarargsArray, extensibleType) and
isVarargsArray, extensibleType, alreadyAiModeled) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, alreadyAiModeled, extensibleType) and
// If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
// candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
// already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
// assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
// types, and we don't need to reexamine it.
(
not CharacteristicsImpl::isModeled(endpoint, _, _, _) and alreadyAiModeled = ""
or
alreadyAiModeled.matches("%ai-%") and
CharacteristicsImpl::isModeled(endpoint, _, _, alreadyAiModeled)
) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
alreadyAiModeled.matches(["", "%ai-%"]) and
includeAutomodelCandidate(package, type, name, signature)
select endpoint.asNode(),
"Related locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,27 +40,45 @@ Endpoint getSampleForCharacteristic(EndpointCharacteristic c, int limit) {
)
}

predicate candidate(
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
string type, string subtypes, string name, string signature, string input, string output,
string isVarargsArray, string extensibleType
) {
// the node is known not to be an endpoint of any appropriate type
forall(EndpointType tp | tp = CharacteristicsImpl::getAPotentialType(endpoint) |
characteristic.hasImplications(tp, false, _)
) and
// the lowest confidence across all endpoint types should be at least highConfidence
confidence =
min(float c |
characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
) and
confidence >= SharedCharacteristics::highConfidence() and
any(ApplicationModeMetadataExtractor meta)
.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
// It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
// as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
not exists(EndpointCharacteristic characteristic2, float confidence2 |
characteristic2 != characteristic
|
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2
.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
)
}

from
Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string message,
ApplicationModeMetadataExtractor meta, DollarAtString package, DollarAtString type,
DollarAtString subtypes, DollarAtString name, DollarAtString signature, DollarAtString input,
DollarAtString output, DollarAtString isVarargsArray, DollarAtString extensibleType
DollarAtString package, DollarAtString type, DollarAtString subtypes, DollarAtString name,
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString extensibleType
where
endpoint = getSampleForCharacteristic(characteristic, 100) and
extensibleType = endpoint.getExtensibleType() and
confidence >= SharedCharacteristics::highConfidence() and
characteristic.hasImplications(any(NegativeSinkType negative), true, confidence) and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
// It's valid for a node to satisfy the logic for both `isSink` and `isSanitizer`, but in that case it will be
// treated by the actual query as a sanitizer, since the final logic is something like
// `isSink(n) and not isSanitizer(n)`. We don't want to include such nodes as negative examples in the prompt, because
// they're ambiguous and might confuse the model, so we explicitly exclude all known sinks from the negative examples.
not exists(EndpointCharacteristic characteristic2, float confidence2, SinkType positiveType |
not positiveType instanceof NegativeSinkType and
characteristic2.appliesToEndpoint(endpoint) and
confidence2 >= SharedCharacteristics::maximalConfidence() and
characteristic2.hasImplications(positiveType, true, confidence2)
) and
candidate(endpoint, characteristic, confidence, package, type, subtypes, name, signature, input,
output, isVarargsArray, extensibleType) and
message = characteristic
select endpoint.asNode(),
message + "\nrelated locations: $@, $@, $@." + "\nmetadata: $@, $@, $@, $@, $@, $@, $@, $@, $@.", //
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ from
DollarAtString signature, DollarAtString input, DollarAtString output,
DollarAtString isVarargsArray, DollarAtString extensibleType
where
extensibleType = endpoint.getExtensibleType() and
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, isVarargsArray) and
// Extract positive examples of sinks belonging to the existing ATM query configurations.
meta.hasMetadata(endpoint, package, type, subtypes, name, signature, input, output,
isVarargsArray, _, extensibleType) and
CharacteristicsImpl::isKnownAs(endpoint, endpointType, _) and
exists(CharacteristicsImpl::getRelatedLocationOrCandidate(endpoint, CallContext()))
select endpoint.asNode(),
Expand Down
5 changes: 0 additions & 5 deletions java/ql/automodel/src/AutomodelEndpointTypes.qll
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ abstract class SinkType extends EndpointType {
SinkType() { any() }
}

/** The `Negative` class for non-sinks. */
class NegativeSinkType extends SinkType {
NegativeSinkType() { this = "non-sink" }
}

/** A sink relevant to the SQL injection query */
class SqlInjectionSinkType extends SinkType {
SqlInjectionSinkType() { this = "sql-injection" }
Expand Down
112 changes: 75 additions & 37 deletions java/ql/automodel/src/AutomodelFrameworkModeCharacteristics.qll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ private import semmle.code.java.security.QueryInjection
private import semmle.code.java.security.RequestForgery
private import semmle.code.java.dataflow.internal.ModelExclusions as ModelExclusions
private import AutomodelJavaUtil as AutomodelJavaUtil
private import AutomodelSharedGetCallable as AutomodelSharedGetCallable
import AutomodelSharedCharacteristics as SharedCharacteristics
import AutomodelEndpointTypes as AutomodelEndpointTypes

Expand Down Expand Up @@ -84,7 +83,7 @@ abstract class FrameworkModeEndpoint extends TFrameworkModeEndpoint {
/**
* Returns the callable that contains the endpoint.
*/
abstract Callable getEnclosingCallable();
abstract Callable getCallable();

abstract Top asTop();

Expand All @@ -106,7 +105,7 @@ class ExplicitParameterEndpoint extends FrameworkModeEndpoint, TExplicitParamete

override string getParamName() { result = param.getName() }

override Callable getEnclosingCallable() { result = param.getCallable() }
override Callable getCallable() { result = param.getCallable() }

override Top asTop() { result = param }

Expand All @@ -126,7 +125,7 @@ class QualifierEndpoint extends FrameworkModeEndpoint, TQualifier {

override string getParamName() { result = "this" }

override Callable getEnclosingCallable() { result = callable }
override Callable getCallable() { result = callable }

override Top asTop() { result = callable }

Expand All @@ -144,7 +143,7 @@ class ReturnValue extends FrameworkModeEndpoint, TReturnValue {

override string getParamName() { none() }

override Callable getEnclosingCallable() { result = callable }
override Callable getCallable() { result = callable }

override Top asTop() { result = callable }

Expand All @@ -163,7 +162,7 @@ class OverridableParameter extends FrameworkModeEndpoint, TOverridableParameter

override string getParamName() { result = param.getName() }

override Callable getEnclosingCallable() { result = method }
override Callable getCallable() { result = method }

override Top asTop() { result = param }

Expand All @@ -181,7 +180,7 @@ class OverridableQualifier extends FrameworkModeEndpoint, TOverridableQualifier

override string getParamName() { result = "this" }

override Callable getEnclosingCallable() { result = m }
override Callable getCallable() { result = m }

override Top asTop() { result = m }

Expand All @@ -202,7 +201,9 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {

class EndpointType = AutomodelEndpointTypes::EndpointType;

class NegativeEndpointType = AutomodelEndpointTypes::NegativeSinkType;
class SinkType = AutomodelEndpointTypes::SinkType;

class SourceType = AutomodelEndpointTypes::SourceType;

class RelatedLocation = Location::Top;

Expand Down Expand Up @@ -244,8 +245,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
additional predicate sinkSpec(
Endpoint e, string package, string type, string name, string signature, string ext, string input
) {
e.getEnclosingCallable().hasQualifiedName(package, type, name) and
signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and
e.getCallable().hasQualifiedName(package, type, name) and
signature = ExternalFlow::paramsString(e.getCallable()) and
ext = "" and
input = e.getMaDInput()
}
Expand All @@ -254,8 +255,8 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
Endpoint e, string package, string type, string name, string signature, string ext,
string output
) {
e.getEnclosingCallable().hasQualifiedName(package, type, name) and
signature = ExternalFlow::paramsString(e.getEnclosingCallable()) and
e.getCallable().hasQualifiedName(package, type, name) and
signature = ExternalFlow::paramsString(e.getCallable()) and
ext = "" and
output = e.getMaDOutput()
}
Expand All @@ -267,10 +268,10 @@ module FrameworkCandidatesImpl implements SharedCharacteristics::CandidateSig {
*/
RelatedLocation getRelatedLocation(Endpoint e, RelatedLocationType type) {
type = MethodDoc() and
result = e.getEnclosingCallable().(Documentable).getJavadoc()
result = e.getCallable().(Documentable).getJavadoc()
or
type = ClassDoc() and
result = e.getEnclosingCallable().getDeclaringType().(Documentable).getJavadoc()
result = e.getCallable().getDeclaringType().(Documentable).getJavadoc()
}
}

Expand All @@ -292,16 +293,27 @@ class FrameworkModeMetadataExtractor extends string {

predicate hasMetadata(
Endpoint e, string package, string type, string subtypes, string name, string signature,
string input, string output, string parameterName
string input, string output, string parameterName, string alreadyAiModeled,
string extensibleType
) {
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
name = e.getEnclosingCallable().getName() and
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
package = e.getEnclosingCallable().getDeclaringType().getPackage().getName() and
type = e.getEnclosingCallable().getDeclaringType().getErasure().(RefType).nestedName() and
subtypes = AutomodelJavaUtil::considerSubtypes(e.getEnclosingCallable()).toString() and
signature = ExternalFlow::paramsString(e.getEnclosingCallable())
exists(Callable callable | e.getCallable() = callable |
(if exists(e.getMaDInput()) then input = e.getMaDInput() else input = "") and
(if exists(e.getMaDOutput()) then output = e.getMaDOutput() else output = "") and
package = callable.getDeclaringType().getPackage().getName() and
// we're using the erased types because the MaD convention is to not specify type parameters.
// Whether something is or isn't a sink doesn't usually depend on the type parameters.
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
subtypes = AutomodelJavaUtil::considerSubtypes(callable).toString() and
name = callable.getName() and
signature = ExternalFlow::paramsString(callable) and
(if exists(e.getParamName()) then parameterName = e.getParamName() else parameterName = "") and
e.getExtensibleType() = extensibleType
) and
(
not CharacteristicsImpl::isModeled(e, _, extensibleType, _) and alreadyAiModeled = ""
or
CharacteristicsImpl::isModeled(e, _, extensibleType, alreadyAiModeled)
)
}
}

Expand All @@ -310,53 +322,79 @@ class FrameworkModeMetadataExtractor extends string {
*/

/**
* A negative characteristic that indicates that an is-style boolean method is unexploitable even if it is a sink.
* A negative characteristic that indicates that parameters of an is-style boolean method should not be considered sinks,
* and its return value should not be considered a source.
*
* A sink is highly unlikely to be exploitable if its callable's name starts with `is` and the callable has a boolean return
* type (e.g. `isDirectory`). These kinds of calls normally do only checks, and appear before the proper call that does
* the dangerous/interesting thing, so we want the latter to be modeled as the sink.
*
* TODO: this might filter too much, it's possible that methods with more than one parameter contain interesting sinks
*/
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
private class UnexploitableIsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
UnexploitableIsCharacteristic() { this = "unexploitable (is-style boolean method)" }

override predicate appliesToEndpoint(Endpoint e) {
not FrameworkCandidatesImpl::isSink(e, _, _) and
e.getEnclosingCallable().getName().matches("is%") and
e.getEnclosingCallable().getReturnType() instanceof BooleanType
e.getCallable().getName().matches("is%") and
e.getCallable().getReturnType() instanceof BooleanType and
(
e.getExtensibleType() = "sinkModel" and
not FrameworkCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not FrameworkCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}

/**
* A negative characteristic that indicates that an existence-checking boolean method is unexploitable even if it is a
* sink.
* A negative characteristic that indicates that parameters of an existence-checking boolean method should not be
* considered sinks, and its return value should not be considered a source.
*
* A sink is highly unlikely to be exploitable if its callable's name is `exists` or `notExists` and the callable has a
* boolean return type. These kinds of calls normally do only checks, and appear before the proper call that does the
* dangerous/interesting thing, so we want the latter to be modeled as the sink.
*/
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
private class UnexploitableExistsCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
UnexploitableExistsCharacteristic() { this = "unexploitable (existence-checking boolean method)" }

override predicate appliesToEndpoint(Endpoint e) {
not FrameworkCandidatesImpl::isSink(e, _, _) and
exists(Callable callable |
callable = e.getEnclosingCallable() and
callable = e.getCallable() and
callable.getName().toLowerCase() = ["exists", "notexists"] and
callable.getReturnType() instanceof BooleanType
|
e.getExtensibleType() = "sinkModel" and
not FrameworkCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not FrameworkCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}

/**
* A negative characteristic that indicates that an endpoint is an argument to an exception, which is not a sink.
* A negative characteristic that indicates that parameters of an exception method or constructor should not be considered sinks,
* and its return value should not be considered a source.
*/
private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
private class ExceptionCharacteristic extends CharacteristicsImpl::NeitherSourceNorSinkCharacteristic
{
ExceptionCharacteristic() { this = "exception" }

override predicate appliesToEndpoint(Endpoint e) {
e.getEnclosingCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable
e.getCallable().getDeclaringType().getASupertype*() instanceof TypeThrowable and
(
e.getExtensibleType() = "sinkModel" and
not FrameworkCandidatesImpl::isSink(e, _, _)
or
e.getExtensibleType() = "sourceModel" and
not FrameworkCandidatesImpl::isSource(e, _, _) and
e.getMaDOutput() = "ReturnValue"
)
}
}

Expand All @@ -368,6 +406,6 @@ private class NotAModelApi extends CharacteristicsImpl::UninterestingToModelChar
NotAModelApi() { this = "not a model API" }

override predicate appliesToEndpoint(Endpoint e) {
not e.getEnclosingCallable() instanceof ModelExclusions::ModelApi
not e.getCallable() instanceof ModelExclusions::ModelApi
}
}
Loading