Skip to content

Commit 228c71c

Browse files
committed
Improve recovery for identifiers and text segment options
Scan to the closing delimiter of an invalid identifier, and better diagnose an invalid text segment option.
1 parent 780ba3c commit 228c71c

File tree

3 files changed

+32
-19
lines changed

3 files changed

+32
-19
lines changed

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ enum ParseError: Error, Hashable {
5454

5555
case unknownGroupKind(String)
5656
case unknownCalloutKind(String)
57+
case unknownTextSegmentMatchingOption(Character)
5758

5859
case invalidMatchingOption(Character)
5960
case cannotRemoveMatchingOptionsAfterCaret
@@ -161,6 +162,8 @@ extension ParseError: CustomStringConvertible {
161162
return "unknown group kind '(\(str)'"
162163
case let .unknownCalloutKind(str):
163164
return "unknown callout kind '\(str)'"
165+
case let .unknownTextSegmentMatchingOption(m):
166+
return "unknown text segment mode '\(m)'; expected 'w' or 'g'"
164167
case let .invalidMatchingOption(c):
165168
return "invalid matching option '\(c)'"
166169
case .cannotRemoveMatchingOptionsAfterCaret:

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -830,18 +830,28 @@ extension Parser {
830830
case "S": return .asciiOnlySpace
831831
case "W": return .asciiOnlyWord
832832
case "y":
833-
p.expect("{")
833+
// Default to grapheme cluster if unknown.
834+
let recoveryMode = OptKind.textSegmentGraphemeMode
835+
guard p.expect("{") else { return recoveryMode }
836+
837+
guard let optChar = p.tryEatWithLoc(), optChar.value != "}" else {
838+
p.errorAtCurrentPosition(.expected("text segment mode"))
839+
return recoveryMode
840+
}
834841
let opt: OptKind
835-
if p.tryEat("w") {
842+
switch optChar.value {
843+
case "w":
836844
opt = .textSegmentWordMode
837-
} else {
838-
p.expect("g")
845+
case "g":
839846
opt = .textSegmentGraphemeMode
847+
case let x:
848+
p.error(.unknownTextSegmentMatchingOption(x), at: optChar.location)
849+
opt = recoveryMode
840850
}
841851
p.expect("}")
842852
return opt
843853

844-
// Swift semantic level options
854+
// Swift semantic level options
845855
case "X": return .graphemeClusterSemantics
846856
case "u": return .unicodeScalarSemantics
847857
case "b": return .byteSemantics
@@ -981,6 +991,8 @@ extension Parser {
981991
}
982992
guard let str = p.tryEatPrefix(\.isWordCharacter) else {
983993
p.error(.identifierMustBeAlphaNumeric(kind), at: firstChar.location)
994+
// Try skip ahead to the closing delimiter for better recovery.
995+
_ = p.lexUntil { $0.src.isEmpty || $0.src.starts(with: ending) }
984996
return ""
985997
}
986998
return str.value

Tests/RegexTests/ParseTests.swift

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2736,11 +2736,10 @@ extension RegexTests {
27362736
diagnosticTest(#"(?^"#, .expected(")"))
27372737
diagnosticTest(#"(?^i"#, .expected(")"))
27382738

2739-
// TODO: These errors could be better.
2740-
diagnosticTest(#"(?y)"#, .expected("{"), .expected("g"), .expected("}"), unsupported: true)
2741-
diagnosticTest(#"(?y{)"#, .expected("g"), .expected("}"), unsupported: true)
2739+
diagnosticTest(#"(?y)"#, .expected("{"), unsupported: true)
2740+
diagnosticTest(#"(?y{)"#, .unknownTextSegmentMatchingOption(")"), .expected("}"), .expected(")"), unsupported: true)
27422741
diagnosticTest(#"(?y{g)"#, .expected("}"), unsupported: true)
2743-
diagnosticTest(#"(?y{x})"#, .expected("g"), .expected("}"), .invalidMatchingOption("}"), unsupported: true)
2742+
diagnosticTest(#"(?y{x})"#, .unknownTextSegmentMatchingOption("x"), unsupported: true)
27442743

27452744
diagnosticTest(#"(?P"#, .expected(")"))
27462745
diagnosticTest(#"(?R"#, .expected(")"), unsupported: true)
@@ -2948,18 +2947,17 @@ extension RegexTests {
29482947
diagnosticTest(#"(?k)"#, .unknownGroupKind("?k"))
29492948
diagnosticTest(#"(?P#)"#, .invalidMatchingOption("#"))
29502949

2951-
// TODO: We shouldn't emit the expected closing delimiter here and elsewhere.
2952-
diagnosticTest(#"(?<#>)"#, .expected(">"), .identifierMustBeAlphaNumeric(.groupName))
2950+
diagnosticTest(#"(?<#>)"#, .identifierMustBeAlphaNumeric(.groupName))
29532951
diagnosticTest(#"(?'1A')"#, .identifierCannotStartWithNumber(.groupName))
29542952

29552953
// TODO: It might be better if tried to consume up to the closing `'` and
29562954
// diagnosed an invalid group name based on that.
29572955
diagnosticTest(#"(?'abc ')"#, .expected("'"))
29582956

2959-
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName), .expected("'"))
2957+
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName))
29602958

29612959
diagnosticTest(#"(?'-')"#, .expectedIdentifier(.groupName), unsupported: true)
2962-
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
2960+
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
29632961
diagnosticTest(#"(?'a-b-c')"#, .expected("'"), unsupported: true)
29642962

29652963
diagnosticTest("(?x)(? : )", .unknownGroupKind("? "))
@@ -3035,12 +3033,12 @@ extension RegexTests {
30353033
diagnosticTest(#"\g{0}"#, .cannotReferToWholePattern)
30363034
diagnosticTest(#"(?(0))"#, .cannotReferToWholePattern, unsupported: true)
30373035

3038-
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3039-
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3040-
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3041-
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3042-
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
3043-
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3036+
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3037+
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3038+
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3039+
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3040+
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3041+
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
30443042

30453043
diagnosticTest(#"(?P>1)"#, .identifierCannotStartWithNumber(.groupName), unsupported: true)
30463044
diagnosticTest(#"\k{1}"#, .identifierCannotStartWithNumber(.groupName), .invalidNamedReference("1"))

0 commit comments

Comments
 (0)