Skip to content

Commit 5945e76

Browse files
committed
Improve recovery for identifiers and text segment options
Scan to the closing delimiter of an invalid identifier, and better diagnose an invalid text segment option.
1 parent d6201a4 commit 5945e76

File tree

3 files changed

+32
-19
lines changed

3 files changed

+32
-19
lines changed

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ enum ParseError: Error, Hashable {
5252

5353
case unknownGroupKind(String)
5454
case unknownCalloutKind(String)
55+
case unknownTextSegmentMatchingOption(Character)
5556

5657
case invalidMatchingOption(Character)
5758
case cannotRemoveMatchingOptionsAfterCaret
@@ -157,6 +158,8 @@ extension ParseError: CustomStringConvertible {
157158
return "unknown group kind '(\(str)'"
158159
case let .unknownCalloutKind(str):
159160
return "unknown callout kind '\(str)'"
161+
case let .unknownTextSegmentMatchingOption(m):
162+
return "unknown text segment mode '\(m)'; expected 'w' or 'g'"
160163
case let .invalidMatchingOption(c):
161164
return "invalid matching option '\(c)'"
162165
case .cannotRemoveMatchingOptionsAfterCaret:

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -815,18 +815,28 @@ extension Parser {
815815
case "S": return .asciiOnlySpace
816816
case "W": return .asciiOnlyWord
817817
case "y":
818-
p.expect("{")
818+
// Default to grapheme cluster if unknown.
819+
let recoveryMode = OptKind.textSegmentGraphemeMode
820+
guard p.expect("{") else { return recoveryMode }
821+
822+
guard let optChar = p.tryEatWithLoc(), optChar.value != "}" else {
823+
p.errorAtCurrentPosition(.expected("text segment mode"))
824+
return recoveryMode
825+
}
819826
let opt: OptKind
820-
if p.tryEat("w") {
827+
switch optChar.value {
828+
case "w":
821829
opt = .textSegmentWordMode
822-
} else {
823-
p.expect("g")
830+
case "g":
824831
opt = .textSegmentGraphemeMode
832+
case let x:
833+
p.error(.unknownTextSegmentMatchingOption(x), at: optChar.location)
834+
opt = recoveryMode
825835
}
826836
p.expect("}")
827837
return opt
828838

829-
// Swift semantic level options
839+
// Swift semantic level options
830840
case "X": return .graphemeClusterSemantics
831841
case "u": return .unicodeScalarSemantics
832842
case "b": return .byteSemantics
@@ -966,6 +976,8 @@ extension Parser {
966976
}
967977
guard let str = p.tryEatPrefix(\.isWordCharacter) else {
968978
p.error(.identifierMustBeAlphaNumeric(kind), at: firstChar.location)
979+
// Try skip ahead to the closing delimiter for better recovery.
980+
_ = p.lexUntil { $0.src.isEmpty || $0.src.starts(with: ending) }
969981
return ""
970982
}
971983
return str.value

Tests/RegexTests/ParseTests.swift

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2728,11 +2728,10 @@ extension RegexTests {
27282728
diagnosticTest(#"(?^"#, .expected(")"))
27292729
diagnosticTest(#"(?^i"#, .expected(")"))
27302730

2731-
// TODO: These errors could be better.
2732-
diagnosticTest(#"(?y)"#, .expected("{"), .expected("g"), .expected("}"), unsupported: true)
2733-
diagnosticTest(#"(?y{)"#, .expected("g"), .expected("}"), unsupported: true)
2731+
diagnosticTest(#"(?y)"#, .expected("{"), unsupported: true)
2732+
diagnosticTest(#"(?y{)"#, .unknownTextSegmentMatchingOption(")"), .expected("}"), .expected(")"), unsupported: true)
27342733
diagnosticTest(#"(?y{g)"#, .expected("}"), unsupported: true)
2735-
diagnosticTest(#"(?y{x})"#, .expected("g"), .expected("}"), .invalidMatchingOption("}"), unsupported: true)
2734+
diagnosticTest(#"(?y{x})"#, .unknownTextSegmentMatchingOption("x"), unsupported: true)
27362735

27372736
diagnosticTest(#"(?P"#, .expected(")"))
27382737
diagnosticTest(#"(?R"#, .expected(")"), unsupported: true)
@@ -2919,18 +2918,17 @@ extension RegexTests {
29192918
diagnosticTest(#"(?k)"#, .unknownGroupKind("?k"))
29202919
diagnosticTest(#"(?P#)"#, .invalidMatchingOption("#"))
29212920

2922-
// TODO: We shouldn't emit the expected closing delimiter here and elsewhere.
2923-
diagnosticTest(#"(?<#>)"#, .expected(">"), .identifierMustBeAlphaNumeric(.groupName))
2921+
diagnosticTest(#"(?<#>)"#, .identifierMustBeAlphaNumeric(.groupName))
29242922
diagnosticTest(#"(?'1A')"#, .identifierCannotStartWithNumber(.groupName))
29252923

29262924
// TODO: It might be better if tried to consume up to the closing `'` and
29272925
// diagnosed an invalid group name based on that.
29282926
diagnosticTest(#"(?'abc ')"#, .expected("'"))
29292927

2930-
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName), .expected("'"))
2928+
diagnosticTest("(?'🔥')", .identifierMustBeAlphaNumeric(.groupName))
29312929

29322930
diagnosticTest(#"(?'-')"#, .expectedIdentifier(.groupName), unsupported: true)
2933-
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
2931+
diagnosticTest(#"(?'--')"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
29342932
diagnosticTest(#"(?'a-b-c')"#, .expected("'"), unsupported: true)
29352933

29362934
diagnosticTest("(?x)(? : )", .unknownGroupKind("? "))
@@ -3006,12 +3004,12 @@ extension RegexTests {
30063004
diagnosticTest(#"\g{0}"#, .cannotReferToWholePattern)
30073005
diagnosticTest(#"(?(0))"#, .cannotReferToWholePattern, unsupported: true)
30083006

3009-
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3010-
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3011-
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3012-
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3013-
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), .expected("'"), unsupported: true)
3014-
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), .unbalancedEndOfGroup, .expected(")"), unsupported: true)
3007+
diagnosticTest(#"(?&&)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3008+
diagnosticTest(#"(?&-1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3009+
diagnosticTest(#"(?P>+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3010+
diagnosticTest(#"(?P=+1)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3011+
diagnosticTest(#"\k'#'"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
3012+
diagnosticTest(#"(?&#)"#, .identifierMustBeAlphaNumeric(.groupName), unsupported: true)
30153013

30163014
diagnosticTest(#"(?P>1)"#, .identifierCannotStartWithNumber(.groupName), unsupported: true)
30173015
diagnosticTest(#"\k{1}"#, .identifierCannotStartWithNumber(.groupName), .invalidNamedReference("1"))

0 commit comments

Comments
 (0)