Skip to content

Commit aae597b

Browse files
committed
Improve recovery for identifiers and text segment options
Scan to the closing delimiter of an invalid identifier, and better diagnose an invalid text segment option.
1 parent ea48956 commit aae597b

File tree

3 files changed

+32
-19
lines changed

3 files changed

+32
-19
lines changed

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ enum ParseError: Error, Hashable {
5555

5656
case unknownGroupKind(String)
5757
case unknownCalloutKind(String)
58+
case unknownTextSegmentMatchingOption(Character)
5859

5960
case invalidMatchingOption(Character)
6061
case cannotRemoveMatchingOptionsAfterCaret
@@ -165,6 +166,8 @@ extension ParseError: CustomStringConvertible {
165166
return "unknown group kind '(\(str)'"
166167
case let .unknownCalloutKind(str):
167168
return "unknown callout kind '\(str)'"
169+
case let .unknownTextSegmentMatchingOption(m):
170+
return "unknown text segment mode '\(m)'; expected 'w' or 'g'"
168171
case let .invalidMatchingOption(c):
169172
return "invalid matching option '\(c)'"
170173
case .cannotRemoveMatchingOptionsAfterCaret:

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -827,18 +827,28 @@ extension Parser {
827827
case "S": return .asciiOnlySpace
828828
case "W": return .asciiOnlyWord
829829
case "y":
830-
p.expect("{")
830+
// Default to grapheme cluster if unknown.
831+
let recoveryMode = OptKind.textSegmentGraphemeMode
832+
guard p.expect("{") else { return recoveryMode }
833+
834+
guard let optChar = p.tryEatWithLoc(), optChar.value != "}" else {
835+
p.errorAtCurrentPosition(.expected("text segment mode"))
836+
return recoveryMode
837+
}
831838
let opt: OptKind
832-
if p.tryEat("w") {
839+
switch optChar.value {
840+
case "w":
833841
opt = .textSegmentWordMode
834-
} else {
835-
p.expect("g")
842+
case "g":
836843
opt = .textSegmentGraphemeMode
844+
case let x:
845+
p.error(.unknownTextSegmentMatchingOption(x), at: optChar.location)
846+
opt = recoveryMode
837847
}
838848
p.expect("}")
839849
return opt
840850

841-
// Swift semantic level options
851+
// Swift semantic level options
842852
case "X": return .graphemeClusterSemantics
843853
case "u": return .unicodeScalarSemantics
844854
case "b": return .byteSemantics
@@ -973,6 +983,8 @@ extension Parser {
973983
}
974984
guard let str = p.tryEatPrefix(\.isWordCharacter) else {
975985
p.error(.identifierMustBeAlphaNumeric(kind), at: firstChar.location)
986+
// Try skip ahead to the closing delimiter for better recovery.
987+
_ = p.lexUntil { $0.src.isEmpty || $0.src.starts(with: ending) }
976988
return ""
977989
}
978990
return str.value

Tests/RegexTests/ParseTests.swift

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2775,11 +2775,10 @@ extension RegexTests {
27752775
diagnosticTest(#"(?^"#, .expected(")"))
27762776
diagnosticTest(#"(?^i"#, .expected(")"))
27772777

2778-
// TODO: These errors could be better.
2779-
diagnosticTest(#"(?y)"#, .expected("{"), .expected("g"), .expected("}"), unsupported: true)
2780-
diagnosticTest(#"(?y{)"#, .expected("g"), .expected("}"), unsupported: true)
2778+
diagnosticTest(#"(?y)"#, .expected("{"), unsupported: true)
2779+
diagnosticTest(#"(?y{)"#, .unknownTextSegmentMatchingOption(")"), .expected("}"), .expected(")"), unsupported: true)
27812780
diagnosticTest(#"(?y{g)"#, .expected("}"), unsupported: true)
2782-
diagnosticTest(#"(?y{x})"#, .expected("g"), .expected("}"), .invalidMatchingOption("}"), unsupported: true)
2781+
diagnosticTest(#"(?y{x})"#, .unknownTextSegmentMatchingOption("x"), unsupported: true)
27832782

27842783
diagnosticTest(#"(?P"#, .expected(")"))
27852784
diagnosticTest(#"(?R"#, .expected(")"), unsupported: true)
@@ -3022,18 +3021,17 @@ extension RegexTests {
30223021
diagnosticTest(#"(?k)"#, .unknownGroupKind("?k"))
30233022
diagnosticTest(#"(?P#)"#, .invalidMatchingOption("#"))
30243023

3025-
// TODO: We shouldn't emit the expected closing delimiter here and elsewhere.
3026-
diagnosticTest(#"(?<#>)"#, .expected(">"), .identifierMustBeAlphaNumeric(.groupName))
3024+
diagnosticTest(#"(?<#>)"#, .identifierMustBeAlphaNumeric(.groupName))
30273025
diagnosticTest(#"(?'1A')"#, .identifierCannotStartWithNumber(.groupName))
30283026

30293027
// TODO: It might be better if tried to consume up to the closing `'` and
30303028
// diagnosed an invalid group name based on that.
30313029
diagnosticTest(#"(?'abc ')"#, .expected("'"))
30323030

3033-
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName), .expected("'"))
3031+
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName))
30343032

30353033
diagnosticTest(#"(?'-')"#, .expectedIdentifier(.groupName), unsupported: true)
3036-
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
3034+
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
30373035
diagnosticTest(#"(?'a-b-c')"#, .expected("'"), unsupported: true)
30383036

30393037
diagnosticTest("(?x)(? : )", .unknownGroupKind("? "))
@@ -3114,12 +3112,12 @@ extension RegexTests {
31143112
diagnosticTest(#"\g{0}"#, .cannotReferToWholePattern)
31153113
diagnosticTest(#"(?(0))"#, .cannotReferToWholePattern, unsupported: true)
31163114

3117-
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3118-
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3119-
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3120-
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3121-
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
3122-
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3115+
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3116+
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3117+
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3118+
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3119+
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3120+
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
31233121

31243122
diagnosticTest(#"(?P>1)"#, .identifierCannotStartWithNumber(.groupName), unsupported: true)
31253123
diagnosticTest(#"\k{1}"#, .identifierCannotStartWithNumber(.groupName), .invalidNamedReference("1"))

0 commit comments

Comments
 (0)