swiftlang
diff --git a/‎Sources/_RegexParser/Regex/AST/Atom.swift
Lines changed: 13 additions & 2 deletions b/‎Sources/_RegexParser/Regex/AST/Atom.swift
Lines changed: 13 additions & 2 deletions
diff --git a/‎Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
Lines changed: 64 additions & 39 deletions b/‎Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift
Lines changed: 64 additions & 39 deletions
@@ -80,6 +80,9 @@ extension AST {
 
       // (?i), (?i-m), ...
       case changeMatchingOptions(MatchingOptionSequence)
+
+      // An invalid atom created by a parse error.
+      case invalid
     }
   }
 }
@@ -104,6 +107,7 @@ extension AST.Atom {
     case .any:                          return nil
     case .startOfLine:                  return nil
     case .endOfLine:                    return nil
+    case .invalid:                      return nil
     }
   }
 
@@ -465,6 +469,9 @@ extension AST.Atom.CharacterProperty {
     /// Some special properties implemented by Java.
     case javaSpecial(JavaSpecial)
 
+    /// An invalid property that has been diagnosed by the parser.
+    case invalid(key: String?, value: String)
+
     public enum MapKind: Hashable {
       case lowercase
       case uppercase
@@ -801,7 +808,7 @@ extension AST.Atom {
 
     case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
         .backreference, .subpattern, .callout, .backtrackingDirective,
-        .changeMatchingOptions:
+        .changeMatchingOptions, .invalid:
       return nil
     }
   }
@@ -815,6 +822,10 @@ extension AST.Atom {
     // \cx, \C-x, \M-x, \M-\C-x, \N{...}
     case .keyboardControl, .keyboardMeta, .keyboardMetaControl, .namedCharacter:
       return true
+    case .scalarSequence:
+      // Unsupported for now (and we will diagnose as such), but treat it as a
+      // valid range operand for better recovery.
+      return true
     default:
       return false
     }
@@ -849,7 +860,7 @@ extension AST.Atom {
 
     case .property, .escaped, .any, .startOfLine, .endOfLine,
         .backreference, .subpattern, .namedCharacter, .callout,
-        .backtrackingDirective, .changeMatchingOptions:
+        .backtrackingDirective, .changeMatchingOptions, .invalid:
       return nil
     }
   }
 
@@ -9,25 +9,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-extension Source {
+extension Parser {
   typealias PropertyKind = AST.Atom.CharacterProperty.Kind
 
   static private func withNormalizedForms<T>(
-    _ str: String, requireInPrefix: Bool = false, match: (String) throws -> T?
-  ) rethrows -> T? {
+    _ str: String, requireInPrefix: Bool = false, match: (String) -> T?
+  ) -> T? {
     // This follows the rules provided by UAX44-LM3, including trying to drop an
     // "is" prefix, which isn't required by UTS#18 RL1.2, but is nice for
     // consistency with other engines and the Unicode.Scalar.Properties names.
     let str = str.filter { !$0.isPatternWhitespace && $0 != "_" && $0 != "-" }
                  .lowercased()
     if requireInPrefix {
       guard str.hasPrefix("in") else { return nil }
-      return try match(String(str.dropFirst(2)))
+      return match(String(str.dropFirst(2)))
     }
-    if let m = try match(str) {
+    if let m = match(str) {
       return m
     }
-    if str.hasPrefix("is"), let m = try match(String(str.dropFirst(2))) {
+    if str.hasPrefix("is"), let m = match(String(str.dropFirst(2))) {
       return m
     }
     return nil
@@ -736,31 +736,40 @@ extension Source {
     return (major, minor)
   }
 
-  static func classifyCharacterPropertyValueOnly(
-    _ value: String
-  ) throws -> PropertyKind {
-    guard !value.isEmpty else { throw ParseError.emptyProperty }
+  mutating func classifyCharacterPropertyValueOnly(
+    _ valueLoc: Located<String>
+  ) -> PropertyKind {
+    let value = valueLoc.value
+
+    func error(_ err: ParseError) -> PropertyKind {
+      self.error(err, at: valueLoc.location)
+      return .invalid(key: nil, value: value)
+    }
+
+    guard !value.isEmpty else {
+      return error(.emptyProperty)
+    }
 
     // Some special cases defined by UTS#18 (and Oniguruma for 'ANY' and
     // 'Assigned').
-    if let specialProp = classifySpecialPropValue(value) {
+    if let specialProp = Self.classifySpecialPropValue(value) {
       return specialProp
     }
 
     // The following properties we can infer keys/values for.
-    if let prop = classifyBoolProperty(value) {
+    if let prop = Self.classifyBoolProperty(value) {
       return .binary(prop, value: true)
     }
-    if let cat = classifyGeneralCategory(value) {
+    if let cat = Self.classifyGeneralCategory(value) {
       return .generalCategory(cat)
     }
-    if let script = classifyScriptProperty(value) {
+    if let script = Self.classifyScriptProperty(value) {
       return .scriptExtension(script)
     }
-    if let posix = classifyPOSIX(value) {
+    if let posix = Self.classifyPOSIX(value) {
       return .posix(posix)
     }
-    if let block = classifyBlockProperty(value, valueOnly: true) {
+    if let block = Self.classifyBlockProperty(value, valueOnly: true) {
       return .block(block)
     }
 
@@ -776,53 +785,67 @@ extension Source {
 
     // TODO: This should be versioned, and do we want a more lax behavior for
     // the runtime?
-    throw ParseError.unknownProperty(key: nil, value: value)
+    return error(.unknownProperty(key: nil, value: value))
   }
 
-  static func classifyCharacterProperty(
-    key: String, value: String
-  ) throws -> PropertyKind {
-    guard !key.isEmpty && !value.isEmpty else { throw ParseError.emptyProperty }
+  mutating func classifyCharacterProperty(
+    key keyLoc: Located<String>, value valueLoc: Located<String>
+  ) -> PropertyKind {
+    let key = keyLoc.value
+    let value = valueLoc.value
+
+    func valueError(_ err: ParseError) -> PropertyKind {
+      error(err, at: valueLoc.location)
+      return .invalid(key: key, value: value)
+    }
+
+    guard !key.isEmpty else {
+      error(.emptyProperty, at: keyLoc.location)
+      return .invalid(key: key, value: value)
+    }
+    guard !value.isEmpty else {
+      return valueError(.emptyProperty)
+    }
 
-    if let prop = classifyBoolProperty(key),
-       let isTrue = classifyCharacterPropertyBoolValue(value) {
+    if let prop = Self.classifyBoolProperty(key),
+       let isTrue = Self.classifyCharacterPropertyBoolValue(value) {
       return .binary(prop, value: isTrue)
     }
 
     // This uses the aliases defined in
     // https://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt.
-    let match = try withNormalizedForms(key) { normalizedKey -> PropertyKind? in
+    let match = Self.withNormalizedForms(key) { normalizedKey -> PropertyKind? in
       switch normalizedKey {
       case "script", "sc":
-        guard let script = classifyScriptProperty(value) else {
-          throw ParseError.unrecognizedScript(value)
+        guard let script = Self.classifyScriptProperty(value) else {
+          return valueError(.unrecognizedScript(value))
         }
         return .script(script)
       case "scriptextensions", "scx":
-        guard let script = classifyScriptProperty(value) else {
-          throw ParseError.unrecognizedScript(value)
+        guard let script = Self.classifyScriptProperty(value) else {
+          return valueError(.unrecognizedScript(value))
         }
         return .scriptExtension(script)
       case "gc", "generalcategory":
-        guard let cat = classifyGeneralCategory(value) else {
-          throw ParseError.unrecognizedCategory(value)
+        guard let cat = Self.classifyGeneralCategory(value) else {
+          return valueError(.unrecognizedCategory(value))
         }
         return .generalCategory(cat)
       case "age":
-        guard let (major, minor) = parseAge(value) else {
-          throw ParseError.invalidAge(value)
+        guard let (major, minor) = Self.parseAge(value) else {
+          return valueError(.invalidAge(value))
         }
         return .age(major: major, minor: minor)
       case "name", "na":
         return .named(value)
       case "numericvalue", "nv":
         guard let numericValue = Double(value) else {
-          throw ParseError.invalidNumericValue(value)
+          return valueError(.invalidNumericValue(value))
         }
         return .numericValue(numericValue)
       case "numerictype", "nt":
-        guard let type = classifyNumericType(value) else {
-          throw ParseError.unrecognizedNumericType(value)
+        guard let type = Self.classifyNumericType(value) else {
+          return valueError(.unrecognizedNumericType(value))
         }
         return .numericType(type)
       case "slc", "simplelowercasemapping":
@@ -833,13 +856,13 @@ extension Source {
         return .mapping(.titlecase, value)
       case "ccc", "canonicalcombiningclass":
         guard let cccValue = UInt8(value), cccValue <= 254 else {
-          throw ParseError.invalidCCC(value)
+          return valueError(.invalidCCC(value))
         }
         return .ccc(.init(rawValue: cccValue))
 
       case "blk", "block":
-        guard let block = classifyBlockProperty(value, valueOnly: false) else {
-          throw ParseError.unrecognizedBlock(value)
+        guard let block = Self.classifyBlockProperty(value, valueOnly: false) else {
+          return valueError(.unrecognizedBlock(value))
         }
         return .block(block)
       default:
@@ -852,6 +875,8 @@ extension Source {
     }
     // TODO: This should be versioned, and do we want a more lax behavior for
     // the runtime?
-    throw ParseError.unknownProperty(key: key, value: value)
+    error(.unknownProperty(key: key, value: value),
+          at: keyLoc.location.union(with: valueLoc.location))
+    return .invalid(key: key, value: value)
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,9 @@ extension AST {`
`80`	`80`
`81`	`81`	`// (?i), (?i-m), ...`
`82`	`82`	`case changeMatchingOptions(MatchingOptionSequence)`
	`83`	`+`
	`84`	`+ // An invalid atom created by a parse error.`
	`85`	`+ case invalid`
`83`	`86`	`}`
`84`	`87`	`}`
`85`	`88`	`}`
`@@ -104,6 +107,7 @@ extension AST.Atom {`
`104`	`107`	`case .any: return nil`
`105`	`108`	`case .startOfLine: return nil`
`106`	`109`	`case .endOfLine: return nil`
	`110`	`+ case .invalid: return nil`
`107`	`111`	`}`
`108`	`112`	`}`
`109`	`113`
`@@ -465,6 +469,9 @@ extension AST.Atom.CharacterProperty {`
`465`	`469`	`/// Some special properties implemented by Java.`
`466`	`470`	`case javaSpecial(JavaSpecial)`
`467`	`471`
	`472`	`+ /// An invalid property that has been diagnosed by the parser.`
	`473`	`+ case invalid(key: String?, value: String)`
	`474`	`+`
`468`	`475`	`public enum MapKind: Hashable {`
`469`	`476`	`case lowercase`
`470`	`477`	`case uppercase`
`@@ -801,7 +808,7 @@ extension AST.Atom {`
`801`	`808`
`802`	`809`	`case .scalarSequence, .property, .any, .startOfLine, .endOfLine,`
`803`	`810`	`.backreference, .subpattern, .callout, .backtrackingDirective,`
`804`		`- .changeMatchingOptions:`
	`811`	`+ .changeMatchingOptions, .invalid:`
`805`	`812`	`return nil`
`806`	`813`	`}`
`807`	`814`	`}`
`@@ -815,6 +822,10 @@ extension AST.Atom {`
`815`	`822`	`// \cx, \C-x, \M-x, \M-\C-x, \N{...}`
`816`	`823`	`case .keyboardControl, .keyboardMeta, .keyboardMetaControl, .namedCharacter:`
`817`	`824`	`return true`
	`825`	`+ case .scalarSequence:`
	`826`	`+ // Unsupported for now (and we will diagnose as such), but treat it as a`
	`827`	`+ // valid range operand for better recovery.`
	`828`	`+ return true`
`818`	`829`	`default:`
`819`	`830`	`return false`
`820`	`831`	`}`
`@@ -849,7 +860,7 @@ extension AST.Atom {`
`849`	`860`
`850`	`861`	`case .property, .escaped, .any, .startOfLine, .endOfLine,`
`851`	`862`	`.backreference, .subpattern, .namedCharacter, .callout,`
`852`		`- .backtrackingDirective, .changeMatchingOptions:`
	`863`	`+ .backtrackingDirective, .changeMatchingOptions, .invalid:`
`853`	`864`	`return nil`
`854`	`865`	`}`
`855`	`866`	`}`