Skip to content

[5.7] Recover from parser errors #519

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions Sources/PatternConverter/PatternConverter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ struct PatternConverter: ParsableCommand {
print("Converting '\(delim)\(regex)\(delim)'")

let ast = try _RegexParser.parse(
regex, .semantic,
experimentalSyntax ? .experimental : .traditional)
regex, experimentalSyntax ? .experimental : .traditional)

// Show rendered source ranges
if renderSourceRanges {
Expand Down
42 changes: 35 additions & 7 deletions Sources/_RegexParser/Regex/AST/AST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,31 @@
public struct AST: Hashable {
public var root: AST.Node
public var globalOptions: GlobalMatchingOptionSequence?
public var diags: Diagnostics

public init(_ root: AST.Node, globalOptions: GlobalMatchingOptionSequence?) {
public init(
_ root: AST.Node, globalOptions: GlobalMatchingOptionSequence?,
diags: Diagnostics
) {
self.root = root
self.globalOptions = globalOptions
self.diags = diags
}
}

extension AST {
/// Whether this AST tree contains at least one capture nested inside of it.
public var hasCapture: Bool { root.hasCapture }

/// Whether this AST tree is either syntactically or semantically invalid.
public var isInvalid: Bool { diags.hasAnyError }

/// If the AST is invalid, throws an error. Otherwise, returns self.
@discardableResult
public func ensureValid() throws -> AST {
try diags.throwAnyError()
return self
}
}

extension AST {
Expand Down Expand Up @@ -265,33 +280,46 @@ extension AST {
public enum Kind: Hashable {
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
// Oniguruma: \k<n>, \k'n'
case absolute(Int)
case absolute(AST.Atom.Number)

// \g{-n} \g<+n> \g'+n' \g<-n> \g'-n' (?+n) (?-n)
// (?(+n)... (?(-n)...
// Oniguruma: \k<-n> \k<+n> \k'-n' \k'+n'
case relative(Int)
case relative(AST.Atom.Number)

// \k<name> \k'name' \g{name} \k{name} (?P=name)
// \g<name> \g'name' (?&name) (?P>name)
// (?(<name>)... (?('name')... (?(name)...
case named(String)

/// (?R), (?(R)..., which are equivalent to (?0), (?(0)...
static var recurseWholePattern: Kind { .absolute(0) }
static func recurseWholePattern(_ loc: SourceLocation) -> Kind {
.absolute(.init(0, at: loc))
}

/// Whether this is a reference that recurses the whole pattern, rather
/// than a group.
public var recursesWholePattern: Bool {
switch self {
case .absolute(let a):
return a.value == 0
default:
return false
}
}
}
public var kind: Kind

/// An additional specifier supported by Oniguruma that specifies what
/// recursion level the group being referenced belongs to.
public var recursionLevel: Located<Int>?
public var recursionLevel: AST.Atom.Number?

/// The location of the inner numeric or textual reference, e.g the location
/// of '-2' in '\g{-2}'. Note this includes the recursion level for e.g
/// '\k<a+2>'.
public var innerLoc: SourceLocation

public init(_ kind: Kind, recursionLevel: Located<Int>? = nil,
public init(_ kind: Kind, recursionLevel: AST.Atom.Number? = nil,
innerLoc: SourceLocation) {
self.kind = kind
self.recursionLevel = recursionLevel
Expand All @@ -300,7 +328,7 @@ extension AST {

/// Whether this is a reference that recurses the whole pattern, rather than
/// a group.
public var recursesWholePattern: Bool { kind == .recurseWholePattern }
public var recursesWholePattern: Bool { kind.recursesWholePattern }
}

/// A set of global matching options in a regular expression literal.
Expand Down
29 changes: 26 additions & 3 deletions Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ extension AST {

// (?i), (?i-m), ...
case changeMatchingOptions(MatchingOptionSequence)

// An invalid atom created by a parse error.
case invalid
}
}
}
Expand All @@ -104,6 +107,7 @@ extension AST.Atom {
case .any: return nil
case .startOfLine: return nil
case .endOfLine: return nil
case .invalid: return nil
}
}

Expand All @@ -113,6 +117,18 @@ extension AST.Atom {
}

extension AST.Atom {
public struct Number: Hashable {
/// The value, which may be `nil` in an invalid AST, e.g the parser expected
/// a number at a given location, or the parsed number overflowed.
public var value: Int?
public var location: SourceLocation

public init(_ value: Int?, at location: SourceLocation) {
self.value = value
self.location = location
}
}

public struct Scalar: Hashable {
public var value: UnicodeScalar
public var location: SourceLocation
Expand Down Expand Up @@ -453,6 +469,9 @@ extension AST.Atom.CharacterProperty {
/// Some special properties implemented by Java.
case javaSpecial(JavaSpecial)

/// An invalid property that has been diagnosed by the parser.
case invalid(key: String?, value: String)

public enum MapKind: Hashable {
case lowercase
case uppercase
Expand Down Expand Up @@ -558,7 +577,7 @@ extension AST.Atom {
/// A PCRE callout written `(?C...)`
public struct PCRE: Hashable {
public enum Argument: Hashable {
case number(Int)
case number(AST.Atom.Number)
case string(String)
}
public var arg: AST.Located<Argument>
Expand Down Expand Up @@ -789,7 +808,7 @@ extension AST.Atom {

case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
.backreference, .subpattern, .callout, .backtrackingDirective,
.changeMatchingOptions:
.changeMatchingOptions, .invalid:
return nil
}
}
Expand All @@ -803,6 +822,10 @@ extension AST.Atom {
// \cx, \C-x, \M-x, \M-\C-x, \N{...}
case .keyboardControl, .keyboardMeta, .keyboardMetaControl, .namedCharacter:
return true
case .scalarSequence:
// Unsupported for now (and we will diagnose as such), but treat it as a
// valid range operand for better recovery.
return true
default:
return false
}
Expand Down Expand Up @@ -837,7 +860,7 @@ extension AST.Atom {

case .property, .escaped, .any, .startOfLine, .endOfLine,
.backreference, .subpattern, .namedCharacter, .callout,
.backtrackingDirective, .changeMatchingOptions:
.backtrackingDirective, .changeMatchingOptions, .invalid:
return nil
}
}
Expand Down
8 changes: 5 additions & 3 deletions Sources/_RegexParser/Regex/AST/Conditional.swift
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ extension AST.Conditional {

extension AST.Conditional.Condition {
public struct PCREVersionNumber: Hashable {
public var major: Int
public var minor: Int
public var major: AST.Atom.Number
public var minor: AST.Atom.Number
public var location: SourceLocation

public init(major: Int, minor: Int, _ location: SourceLocation) {
public init(
major: AST.Atom.Number, minor: AST.Atom.Number, _ location: SourceLocation
) {
self.major = major
self.minor = minor
self.location = location
Expand Down
6 changes: 3 additions & 3 deletions Sources/_RegexParser/Regex/AST/MatchingOptions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,13 @@ extension AST {
}
public enum Kind: Hashable {
/// (*LIMIT_DEPTH=d)
case limitDepth(Located<Int>)
case limitDepth(AST.Atom.Number)

/// (*LIMIT_HEAP=d)
case limitHeap(Located<Int>)
case limitHeap(AST.Atom.Number)

/// (*LIMIT_MATCH=d)
case limitMatch(Located<Int>)
case limitMatch(AST.Atom.Number)

/// (*NOTEMPTY)
case notEmpty
Expand Down
16 changes: 8 additions & 8 deletions Sources/_RegexParser/Regex/AST/Quantification.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ extension AST {
}

public enum Amount: Hashable {
case zeroOrMore // *
case oneOrMore // +
case zeroOrOne // ?
case exactly(Located<Int>) // {n}
case nOrMore(Located<Int>) // {n,}
case upToN(Located<Int>) // {,n}
case range(Located<Int>, Located<Int>) // {n,m}
case zeroOrMore // *
case oneOrMore // +
case zeroOrOne // ?
case exactly(AST.Atom.Number) // {n}
case nOrMore(AST.Atom.Number) // {n,}
case upToN(AST.Atom.Number) // {,n}
case range(AST.Atom.Number, AST.Atom.Number) // {n,m}
}

public enum Kind: String, Hashable {
Expand All @@ -58,7 +58,7 @@ extension AST {

extension AST.Quantification.Amount {
/// The bounds.
public var bounds: (atLeast: Int, atMost: Int?) {
public var bounds: (atLeast: Int?, atMost: Int?) {
switch self {
case .zeroOrMore: return (0, nil)
case .oneOrMore: return (1, nil)
Expand Down
Loading