Skip to content

Commit 32f5667

Browse files
am0o0hmac
authored andcommitted
revert YAML.qll and yaml sinks to previous PR, make a separate experimental query only for yaml
1 parent c582ea6 commit 32f5667

15 files changed

+564
-154
lines changed

ruby/ql/lib/codeql/ruby/frameworks/Yaml.qll

Lines changed: 10 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,68 +10,26 @@ private import codeql.ruby.ApiGraphs
1010
* A taint step related to the result of `YAML.parse` calls, or similar.
1111
* In the following example, this step will propagate taint from
1212
* `source` to `sink`:
13-
* this contains two seperate steps:
13+
*
1414
* ```rb
1515
* x = source
16-
* sink = YAML.parse(x)
17-
* ```
18-
* By second step
19-
* source is a Successor of `YAML.parse(x)`
20-
* which ends with `to_ruby` or an Element of `to_ruby`
21-
* ```ruby
22-
* sink source.to_ruby # Unsafe call
16+
* result = YAML.parse(x)
17+
* sink result.to_ruby # Unsafe call
2318
* ```
2419
*/
2520
private class YamlParseStep extends AdditionalTaintStep {
2621
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
27-
exists(API::Node yamlParserMethod |
28-
succ = yamlParserMethod.getReturn().asSource() and
22+
exists(DataFlow::CallNode yamlParserMethod |
23+
succ = yamlParserMethod.getAMethodCall("to_ruby") and
2924
(
30-
yamlParserMethod = yamlLibrary().getMethod(["parse", "parse_stream"]) and
31-
pred =
32-
[yamlParserMethod.getParameter(0), yamlParserMethod.getKeywordParameter("yaml")].asSink()
25+
yamlParserMethod = yamlNode().getAMethodCall(["parse", "parse_stream"]) and
26+
pred = [yamlParserMethod.getArgument(0), yamlParserMethod.getKeywordArgument("yaml")]
3327
or
34-
yamlParserMethod = yamlLibrary().getMethod("parse_file") and
35-
pred =
36-
[yamlParserMethod.getParameter(0), yamlParserMethod.getKeywordParameter("filename")]
37-
.asSink()
28+
yamlParserMethod = yamlNode().getAMethodCall("parse_file") and
29+
pred = [yamlParserMethod.getArgument(0), yamlParserMethod.getKeywordArgument("filename")]
3830
)
3931
)
40-
or
41-
exists(API::Node parseSuccessors | parseSuccessors = yamlNode() |
42-
succ =
43-
[
44-
parseSuccessors.getMethod(["to_ruby", "transform"]).getReturn().asSource(),
45-
parseSuccessors.getMethod(["to_ruby", "transform"]).getReturn().getAnElement().asSource()
46-
] and
47-
pred = parseSuccessors.asSource()
48-
)
49-
or
50-
exists(API::Node parseSuccessors | parseSuccessors = yamlNode() |
51-
succ =
52-
[
53-
parseSuccessors.getMethod(_).getBlock().getParameter(_).asSource(),
54-
parseSuccessors.getMethod(_).getReturn().asSource()
55-
] and
56-
pred = parseSuccessors.asSource()
57-
)
5832
}
5933
}
6034

61-
/**
62-
* Gets A Node ends with YAML parse, parse_stream, parse_file methods
63-
*/
64-
API::Node yamlNode() {
65-
result = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"]).getReturn()
66-
or
67-
result = yamlNode().getMethod(_).getReturn()
68-
or
69-
result = yamlNode().getMethod(_).getBlock().getParameter(_)
70-
or
71-
result = yamlNode().getAnElement()
72-
}
73-
74-
/**
75-
* Gets A YAML module instance
76-
*/
77-
API::Node yamlLibrary() { result = API::getTopLevelMember(["YAML", "Psych"]) }
35+
private API::Node yamlNode() { result = API::getTopLevelMember(["YAML", "Psych"]) }

ruby/ql/lib/codeql/ruby/security/UnsafeDeserializationCustomizations.qll

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ private import codeql.ruby.dataflow.RemoteFlowSources
1111
private import codeql.ruby.frameworks.ActiveJob
1212
private import codeql.ruby.frameworks.core.Module
1313
private import codeql.ruby.frameworks.core.Kernel
14-
private import codeql.ruby.frameworks.Yaml
1514

1615
module UnsafeDeserialization {
1716
/**
@@ -83,30 +82,27 @@ module UnsafeDeserialization {
8382
class YamlLoadArgument extends Sink {
8483
YamlLoadArgument() {
8584
// Note: this is safe in psych/yaml >= 4.0.0.
86-
this = yamlLibrary().getAMethodCall("load").getArgument(0)
85+
this = yamlNode().getAMethodCall("load").getArgument(0)
8786
or
8887
this =
89-
yamlLibrary()
90-
.getAMethodCall(["unsafe_load_file", "unsafe_load", "load_stream"])
91-
.getArgument(0)
88+
yamlNode().getAMethodCall(["unsafe_load_file", "unsafe_load", "load_stream"]).getArgument(0)
9289
or
93-
this = yamlLibrary().getAMethodCall(["unsafe_load", "load_stream"]).getKeywordArgument("yaml")
90+
this = yamlNode().getAMethodCall(["unsafe_load", "load_stream"]).getKeywordArgument("yaml")
9491
or
95-
this = yamlLibrary().getAMethodCall("unsafe_load_file").getKeywordArgument("filename")
92+
this = yamlNode().getAMethodCall("unsafe_load_file").getKeywordArgument("filename")
9693
}
9794
}
9895

96+
private API::Node yamlNode() { result = API::getTopLevelMember(["YAML", "Psych"]) }
97+
9998
/**
10099
* An argument in a call to `YAML.parse*`, considered a sink for unsafe deserialization
101-
* if there is a call to `to_ruby` on the returned value of any Successor.
100+
* if there is a call to `to_ruby` on the returned value.
102101
*/
103102
class YamlParseArgument extends Sink {
104103
YamlParseArgument() {
105-
exists(API::Node toRubyReceiver |
106-
toRubyReceiver = yamlNode() and this = toRubyReceiver.asSource()
107-
|
108-
exists(toRubyReceiver.getMethod(["to_ruby", "transform"]))
109-
)
104+
this =
105+
yamlNode().getAMethodCall(["parse", "parse_stream", "parse_file"]).getAMethodCall("to_ruby")
110106
}
111107
}
112108

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
<!DOCTYPE qhelp PUBLIC "-//Semmle//qhelp//EN" "qhelp.dtd">
2+
<qhelp>
3+
4+
<overview>
5+
<p>
6+
Deserializing untrusted data using any method that allows the construction of
7+
arbitrary objects is easily exploitable and, in many cases, allows an attacker
8+
to execute arbitrary code.
9+
</p>
10+
</overview>
11+
12+
<recommendation>
13+
14+
<p>
15+
If deserializing an untrusted YAML document using the <code>psych</code> gem,
16+
prefer the <code>safe_load</code> and <code>safe_load_file</code> methods over
17+
<code>load</code> and <code>load_file</code>, as the former will safely
18+
handle untrusted data. Avoid passing untrusted data to the <code>load_stream</code>
19+
method. In <code>psych</code> version 4.0.0 and above, the <code>load</code> method can
20+
safely be used.
21+
</p>
22+
23+
</recommendation>
24+
25+
<example>
26+
<p>
27+
The following example calls the <code>Marshal.load</code>,
28+
<code>JSON.load</code>, <code>YAML.load</code>, and <code>Oj.load</code> methods
29+
on data from an HTTP request. Since these methods are capable of deserializing
30+
to arbitrary objects, this is inherently unsafe.
31+
</p>
32+
<sample src="examples/UnsafeDeserializationBad.rb"/>
33+
34+
<p>
35+
Using <code>JSON.parse</code> and <code>YAML.safe_load</code> instead, as in the
36+
following example, removes the vulnerability. Similarly, calling
37+
<code>Oj.load</code> with any mode other than <code>:object</code> is safe, as
38+
is calling <code>Oj.safe_load</code>. Note that there is no safe way to deserialize
39+
untrusted data using <code>Marshal</code>.
40+
</p>
41+
<sample src="examples/UnsafeDeserializationGood.rb"/>
42+
</example>
43+
44+
<references>
45+
46+
<li>
47+
OWASP vulnerability description:
48+
<a href="https://www.owasp.org/index.php/Deserialization_of_untrusted_data">deserialization of untrusted data</a>.
49+
</li>
50+
<li>
51+
Ruby documentation: <a href="https://docs.ruby-lang.org/en/3.0.0/doc/security_rdoc.html">guidance on deserializing objects safely</a>.
52+
</li>
53+
<li>
54+
Ruby documentation: <a href="https://ruby-doc.org/stdlib-3.0.2/libdoc/yaml/rdoc/YAML.html#module-YAML-label-Security">security guidance on the YAML library</a>.
55+
</li>
56+
<li>
57+
You can read that how unsafe yaml load methods can lead to code executions:
58+
<a href="https://devcraft.io/2021/01/07/universal-deserialisation-gadget-for-ruby-2-x-3-x.html">Universal Deserialisation Gadget for Ruby 2.x-3.x </a>.
59+
</li>
60+
</references>
61+
62+
</qhelp>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* @name Deserialization of user-controlled yaml data
3+
* @description Deserializing user-controlled yaml data may allow attackers to
4+
* execute arbitrary code.
5+
* @kind path-problem
6+
* @problem.severity warning
7+
* @security-severity 9.8
8+
* @precision high
9+
* @id rb/unsafe-unsafeyamldeserialization
10+
* @tags security
11+
* external/cwe/cwe-502
12+
*/
13+
14+
import ruby
15+
import codeql.ruby.security.UnsafeDeserializationQuery
16+
import UnsafeCodeConstructionFlow::PathGraph
17+
18+
from UnsafeCodeConstructionFlow::PathNode source, UnsafeCodeConstructionFlow::PathNode sink
19+
where UnsafeCodeConstructionFlow::flowPath(source, sink)
20+
select sink.getNode(), source, sink, "Unsafe deserialization depends on a $@.", source.getNode(),
21+
source.getNode().(UnsafeDeserialization::Source).describe()
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/**
2+
* Provides a taint-tracking configuration for reasoning about unsafe deserialization.
3+
*
4+
* Note, for performance reasons: only import this file if
5+
* `UnsafeYamlDeserializationFlow` is needed, otherwise
6+
* `UnsafeYamlDeserializationCustomizations` should be imported instead.
7+
*/
8+
9+
private import codeql.ruby.AST
10+
private import codeql.ruby.DataFlow
11+
private import codeql.ruby.TaintTracking
12+
private import codeql.ruby.ApiGraphs
13+
import UnsafeYamlDeserializationCustomizations::UnsafeYamlDeserialization
14+
import Yaml
15+
16+
private module UnsafeYamlDeserializationConfig implements DataFlow::StateConfigSig {
17+
class FlowState = FlowState::State;
18+
19+
predicate isSource(DataFlow::Node source, FlowState state) {
20+
source instanceof Source and
21+
(state instanceof FlowState::Parse or state instanceof FlowState::Load)
22+
}
23+
24+
predicate isSink(DataFlow::Node sink, FlowState state) {
25+
sink instanceof Sink and
26+
(state instanceof FlowState::Parse or state instanceof FlowState::Load)
27+
}
28+
29+
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
30+
31+
/**
32+
* A taint step related to the result of `YAML.parse` calls, or similar.
33+
* In the following example, this step will propagate taint from
34+
* `source` to `sink`:
35+
* this contains two seperate steps:
36+
* ```rb
37+
* x = source
38+
* sink = YAML.parse(x)
39+
* ```
40+
* By second step
41+
* source is a Successor of `YAML.parse(x)`
42+
* which ends with `to_ruby` or an Element of `to_ruby`
43+
* ```ruby
44+
* sink source.to_ruby # Unsafe call
45+
* ```
46+
*/
47+
predicate isAdditionalFlowStep(
48+
DataFlow::Node pred, FlowState stateFrom, DataFlow::Node succ, FlowState stateTo
49+
) {
50+
(
51+
exists(API::Node parseSuccessors, API::Node parseMethod |
52+
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"]) and
53+
parseSuccessors = yamlParseNode(parseMethod)
54+
|
55+
succ = parseSuccessors.getMethod("to_ruby").getReturn().asSource() and
56+
pred = parseMethod.getArgument(0).asSink()
57+
)
58+
or
59+
exists(API::Node parseMethod |
60+
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"])
61+
|
62+
succ = parseMethod.getReturn().asSource() and
63+
pred = parseMethod.getArgument(0).asSink()
64+
)
65+
) and
66+
stateFrom instanceof FlowState::Parse and
67+
stateTo instanceof FlowState::Parse
68+
}
69+
}
70+
71+
predicate isAdditionalFlowStepTest(DataFlow::Node pred, DataFlow::Node succ) {
72+
exists(API::Node parseMethod |
73+
parseMethod = yamlLibrary().getMethod(["parse", "parse_stream", "parse_file"])
74+
|
75+
succ = parseMethod.getReturn().asSource() and
76+
pred = parseMethod.getArgument(0).asSink()
77+
)
78+
}
79+
80+
/**
81+
* Taint-tracking for reasoning about unsafe deserialization.
82+
*/
83+
module UnsafeCodeConstructionFlow = TaintTracking::GlobalWithState<UnsafeYamlDeserializationConfig>;

0 commit comments

Comments
 (0)