Skip to content

Commit 0a88a36

Browse files
authored
Merge pull request #519 from hamishknight/totally-5.7
[5.7] Recover from parser errors
2 parents 733c96f + c1c4e61 commit 0a88a36

29 files changed

+2303
-1849
lines changed

Sources/PatternConverter/PatternConverter.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ struct PatternConverter: ParsableCommand {
5050
print("Converting '\(delim)\(regex)\(delim)'")
5151

5252
let ast = try _RegexParser.parse(
53-
regex, .semantic,
54-
experimentalSyntax ? .experimental : .traditional)
53+
regex, experimentalSyntax ? .experimental : .traditional)
5554

5655
// Show rendered source ranges
5756
if renderSourceRanges {

Sources/_RegexParser/Regex/AST/AST.swift

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,31 @@
1515
public struct AST: Hashable {
1616
public var root: AST.Node
1717
public var globalOptions: GlobalMatchingOptionSequence?
18+
public var diags: Diagnostics
1819

19-
public init(_ root: AST.Node, globalOptions: GlobalMatchingOptionSequence?) {
20+
public init(
21+
_ root: AST.Node, globalOptions: GlobalMatchingOptionSequence?,
22+
diags: Diagnostics
23+
) {
2024
self.root = root
2125
self.globalOptions = globalOptions
26+
self.diags = diags
2227
}
2328
}
2429

2530
extension AST {
2631
/// Whether this AST tree contains at least one capture nested inside of it.
2732
public var hasCapture: Bool { root.hasCapture }
33+
34+
/// Whether this AST tree is either syntactically or semantically invalid.
35+
public var isInvalid: Bool { diags.hasAnyError }
36+
37+
/// If the AST is invalid, throws an error. Otherwise, returns self.
38+
@discardableResult
39+
public func ensureValid() throws -> AST {
40+
try diags.throwAnyError()
41+
return self
42+
}
2843
}
2944

3045
extension AST {
@@ -265,33 +280,46 @@ extension AST {
265280
public enum Kind: Hashable {
266281
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
267282
// Oniguruma: \k<n>, \k'n'
268-
case absolute(Int)
283+
case absolute(AST.Atom.Number)
269284

270285
// \g{-n} \g<+n> \g'+n' \g<-n> \g'-n' (?+n) (?-n)
271286
// (?(+n)... (?(-n)...
272287
// Oniguruma: \k<-n> \k<+n> \k'-n' \k'+n'
273-
case relative(Int)
288+
case relative(AST.Atom.Number)
274289

275290
// \k<name> \k'name' \g{name} \k{name} (?P=name)
276291
// \g<name> \g'name' (?&name) (?P>name)
277292
// (?(<name>)... (?('name')... (?(name)...
278293
case named(String)
279294

280295
/// (?R), (?(R)..., which are equivalent to (?0), (?(0)...
281-
static var recurseWholePattern: Kind { .absolute(0) }
296+
static func recurseWholePattern(_ loc: SourceLocation) -> Kind {
297+
.absolute(.init(0, at: loc))
298+
}
299+
300+
/// Whether this is a reference that recurses the whole pattern, rather
301+
/// than a group.
302+
public var recursesWholePattern: Bool {
303+
switch self {
304+
case .absolute(let a):
305+
return a.value == 0
306+
default:
307+
return false
308+
}
309+
}
282310
}
283311
public var kind: Kind
284312

285313
/// An additional specifier supported by Oniguruma that specifies what
286314
/// recursion level the group being referenced belongs to.
287-
public var recursionLevel: Located<Int>?
315+
public var recursionLevel: AST.Atom.Number?
288316

289317
/// The location of the inner numeric or textual reference, e.g the location
290318
/// of '-2' in '\g{-2}'. Note this includes the recursion level for e.g
291319
/// '\k<a+2>'.
292320
public var innerLoc: SourceLocation
293321

294-
public init(_ kind: Kind, recursionLevel: Located<Int>? = nil,
322+
public init(_ kind: Kind, recursionLevel: AST.Atom.Number? = nil,
295323
innerLoc: SourceLocation) {
296324
self.kind = kind
297325
self.recursionLevel = recursionLevel
@@ -300,7 +328,7 @@ extension AST {
300328

301329
/// Whether this is a reference that recurses the whole pattern, rather than
302330
/// a group.
303-
public var recursesWholePattern: Bool { kind == .recurseWholePattern }
331+
public var recursesWholePattern: Bool { kind.recursesWholePattern }
304332
}
305333

306334
/// A set of global matching options in a regular expression literal.

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ extension AST {
8080

8181
// (?i), (?i-m), ...
8282
case changeMatchingOptions(MatchingOptionSequence)
83+
84+
// An invalid atom created by a parse error.
85+
case invalid
8386
}
8487
}
8588
}
@@ -104,6 +107,7 @@ extension AST.Atom {
104107
case .any: return nil
105108
case .startOfLine: return nil
106109
case .endOfLine: return nil
110+
case .invalid: return nil
107111
}
108112
}
109113

@@ -113,6 +117,18 @@ extension AST.Atom {
113117
}
114118

115119
extension AST.Atom {
120+
public struct Number: Hashable {
121+
/// The value, which may be `nil` in an invalid AST, e.g the parser expected
122+
/// a number at a given location, or the parsed number overflowed.
123+
public var value: Int?
124+
public var location: SourceLocation
125+
126+
public init(_ value: Int?, at location: SourceLocation) {
127+
self.value = value
128+
self.location = location
129+
}
130+
}
131+
116132
public struct Scalar: Hashable {
117133
public var value: UnicodeScalar
118134
public var location: SourceLocation
@@ -453,6 +469,9 @@ extension AST.Atom.CharacterProperty {
453469
/// Some special properties implemented by Java.
454470
case javaSpecial(JavaSpecial)
455471

472+
/// An invalid property that has been diagnosed by the parser.
473+
case invalid(key: String?, value: String)
474+
456475
public enum MapKind: Hashable {
457476
case lowercase
458477
case uppercase
@@ -558,7 +577,7 @@ extension AST.Atom {
558577
/// A PCRE callout written `(?C...)`
559578
public struct PCRE: Hashable {
560579
public enum Argument: Hashable {
561-
case number(Int)
580+
case number(AST.Atom.Number)
562581
case string(String)
563582
}
564583
public var arg: AST.Located<Argument>
@@ -789,7 +808,7 @@ extension AST.Atom {
789808

790809
case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
791810
.backreference, .subpattern, .callout, .backtrackingDirective,
792-
.changeMatchingOptions:
811+
.changeMatchingOptions, .invalid:
793812
return nil
794813
}
795814
}
@@ -803,6 +822,10 @@ extension AST.Atom {
803822
// \cx, \C-x, \M-x, \M-\C-x, \N{...}
804823
case .keyboardControl, .keyboardMeta, .keyboardMetaControl, .namedCharacter:
805824
return true
825+
case .scalarSequence:
826+
// Unsupported for now (and we will diagnose as such), but treat it as a
827+
// valid range operand for better recovery.
828+
return true
806829
default:
807830
return false
808831
}
@@ -837,7 +860,7 @@ extension AST.Atom {
837860

838861
case .property, .escaped, .any, .startOfLine, .endOfLine,
839862
.backreference, .subpattern, .namedCharacter, .callout,
840-
.backtrackingDirective, .changeMatchingOptions:
863+
.backtrackingDirective, .changeMatchingOptions, .invalid:
841864
return nil
842865
}
843866
}

Sources/_RegexParser/Regex/AST/Conditional.swift

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,13 @@ extension AST.Conditional {
6666

6767
extension AST.Conditional.Condition {
6868
public struct PCREVersionNumber: Hashable {
69-
public var major: Int
70-
public var minor: Int
69+
public var major: AST.Atom.Number
70+
public var minor: AST.Atom.Number
7171
public var location: SourceLocation
7272

73-
public init(major: Int, minor: Int, _ location: SourceLocation) {
73+
public init(
74+
major: AST.Atom.Number, minor: AST.Atom.Number, _ location: SourceLocation
75+
) {
7476
self.major = major
7577
self.minor = minor
7678
self.location = location

Sources/_RegexParser/Regex/AST/MatchingOptions.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,13 @@ extension AST {
175175
}
176176
public enum Kind: Hashable {
177177
/// (*LIMIT_DEPTH=d)
178-
case limitDepth(Located<Int>)
178+
case limitDepth(AST.Atom.Number)
179179

180180
/// (*LIMIT_HEAP=d)
181-
case limitHeap(Located<Int>)
181+
case limitHeap(AST.Atom.Number)
182182

183183
/// (*LIMIT_MATCH=d)
184-
case limitMatch(Located<Int>)
184+
case limitMatch(AST.Atom.Number)
185185

186186
/// (*NOTEMPTY)
187187
case notEmpty

Sources/_RegexParser/Regex/AST/Quantification.swift

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,13 @@ extension AST {
3737
}
3838

3939
public enum Amount: Hashable {
40-
case zeroOrMore // *
41-
case oneOrMore // +
42-
case zeroOrOne // ?
43-
case exactly(Located<Int>) // {n}
44-
case nOrMore(Located<Int>) // {n,}
45-
case upToN(Located<Int>) // {,n}
46-
case range(Located<Int>, Located<Int>) // {n,m}
40+
case zeroOrMore // *
41+
case oneOrMore // +
42+
case zeroOrOne // ?
43+
case exactly(AST.Atom.Number) // {n}
44+
case nOrMore(AST.Atom.Number) // {n,}
45+
case upToN(AST.Atom.Number) // {,n}
46+
case range(AST.Atom.Number, AST.Atom.Number) // {n,m}
4747
}
4848

4949
public enum Kind: String, Hashable {
@@ -58,7 +58,7 @@ extension AST {
5858

5959
extension AST.Quantification.Amount {
6060
/// The bounds.
61-
public var bounds: (atLeast: Int, atMost: Int?) {
61+
public var bounds: (atLeast: Int?, atMost: Int?) {
6262
switch self {
6363
case .zeroOrMore: return (0, nil)
6464
case .oneOrMore: return (1, nil)

0 commit comments

Comments
 (0)