Skip to content

Limit recursion in regex parser #757

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions Sources/_RegexParser/Regex/Parse/Diagnostics.swift
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ enum ParseError: Error, Hashable {

case expectedCalloutArgument

// Excessively nested groups (i.e. recursion)
case nestingTooDeep

// MARK: Semantic Errors

case unsupported(String)
Expand Down Expand Up @@ -241,6 +244,9 @@ extension ParseError: CustomStringConvertible {
return "character '\(lhs)' must compare less than or equal to '\(rhs)'"
case .notQuantifiable:
return "expression is not quantifiable"

case .nestingTooDeep:
return "group is too deeply nested"
}
}
}
Expand Down Expand Up @@ -302,25 +308,39 @@ extension Diagnostic {
public struct Diagnostics: Hashable {
public private(set) var diags = [Diagnostic]()

// In the event of an unrecoverable parse error, set this
// to avoid emitting spurious diagnostics.
internal var suppressFurtherDiagnostics = false

public init() {}
public init(_ diags: [Diagnostic]) {
self.diags = diags
}

/// Add a new diagnostic to emit.
public mutating func append(_ diag: Diagnostic) {
guard !suppressFurtherDiagnostics else {
return
}
diags.append(diag)
}

/// Add all the diagnostics of another diagnostic collection.
public mutating func append(contentsOf other: Diagnostics) {
guard !suppressFurtherDiagnostics else {
return
}
diags.append(contentsOf: other.diags)
}

/// Add all the new fatal error diagnostics of another diagnostic collection.
/// This assumes that `other` was the same as `self`, but may have additional
/// diagnostics added to it.
public mutating func appendNewFatalErrors(from other: Diagnostics) {
guard !suppressFurtherDiagnostics else {
return
}

let newDiags = other.diags.dropFirst(diags.count)
for diag in newDiags where diag.behavior == .fatalError {
append(diag)
Expand Down
32 changes: 32 additions & 0 deletions Sources/_RegexParser/Regex/Parse/Parse.swift
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ struct ParsingContext {
/// A set of used group names.
private var usedGroupNames = Set<String>()

/// The depth of calls to parseNode (recursion depth plus 1)
fileprivate var parseDepth = 0

/// The syntax options currently set.
fileprivate(set) var syntax: SyntaxOptions

Expand All @@ -88,6 +91,8 @@ struct ParsingContext {
}
}

fileprivate var maxParseDepth: Int { 64 }

init(syntax: SyntaxOptions) {
self.syntax = syntax
}
Expand Down Expand Up @@ -188,6 +193,20 @@ extension Parser {
/// Alternation -> Concatenation ('|' Concatenation)*
///
mutating func parseNode() -> AST.Node {
// Excessively nested groups is a common DOS attack, so limit
// our recursion.
context.parseDepth += 1
defer { context.parseDepth -= 1 }
guard context.parseDepth < context.maxParseDepth else {
self.errorAtCurrentPosition(.nestingTooDeep)

// This is not generally recoverable and further errors will be
// incorrect
diags.suppressFurtherDiagnostics = true

return .empty(.init(loc(src.currentPosition)))
}

let _start = src.currentPosition

if src.isEmpty { return .empty(.init(loc(_start))) }
Expand Down Expand Up @@ -504,6 +523,19 @@ extension Parser {
mutating func parseCustomCharacterClass(
_ start: Source.Located<CustomCC.Start>
) -> CustomCC {
// Excessively nested recursion is a common DOS attack, so limit
// our recursion.
context.parseDepth += 1
defer { context.parseDepth -= 1 }
guard context.parseDepth < context.maxParseDepth else {
self.errorAtCurrentPosition(.nestingTooDeep)

// This is not generally recoverable and further errors will be
// incorrect
diags.suppressFurtherDiagnostics = true
return .init(start, [], start.location)
}

let alreadyInCCC = context.isInCustomCharacterClass
context.isInCustomCharacterClass = true
defer { context.isInCustomCharacterClass = alreadyInCCC }
Expand Down
22 changes: 22 additions & 0 deletions Tests/RegexTests/ParseTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3322,6 +3322,28 @@ extension RegexTests {
diagnosticTest("(*LIMIT_DEPTH=-1", .expectedNumber("", kind: .decimal), .expected(")"), unsupported: true)
}

func testMaliciousNesting() {
// Excessively nested subpatterns is a common DOS attack
diagnosticTest(
String(repeating: "(", count: 500)
+ "a"
+ String(repeating: ")*", count: 500),
.nestingTooDeep)

diagnosticTest(
String(repeating: "(?:", count: 500)
+ "a"
+ String(repeating: ")*", count: 500),
.nestingTooDeep)

diagnosticTest(
String(repeating: "[", count: 500)
+ "a"
+ String(repeating: "]*", count: 500),
.nestingTooDeep)

}

func testDelimiterLexingErrors() {

// MARK: Printable ASCII
Expand Down