Skip to content

Commit c8c21aa

Browse files
committed
Allow unbounded and empty quoted sequences
PCRE and ICU both support quoted sequences that don't have a terminating `\E`, and both support such unterminated sequences being empty. ICU additionally supports `\Q\E` being empty, so allow it to be empty in general.
1 parent 88dc9dd commit c8c21aa

File tree

2 files changed

+13
-4
lines changed

2 files changed

+13
-4
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ extension Source {
565565

566566
/// Try to consume quoted content
567567
///
568-
/// Quote -> '\Q' (!'\E' .)* '\E'
568+
/// Quote -> '\Q' (!'\E' .)* '\E'?
569569
///
570570
/// With `SyntaxOptions.experimentalQuotes`, also accepts
571571
///
@@ -578,9 +578,10 @@ extension Source {
578578
mutating func lexQuote(context: ParsingContext) throws -> AST.Quote? {
579579
let str = try recordLoc { src -> String? in
580580
if src.tryEat(sequence: #"\Q"#) {
581-
return try src.expectQuoted(endingWith: #"\E"#).value
581+
return src.lexUntil { $0.isEmpty || $0.tryEat(sequence: #"\E"#) }.value
582582
}
583583
if context.experimentalQuotes, src.tryEat("\"") {
584+
// TODO: Can experimental quotes be empty?
584585
return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
585586
}
586587
return nil

Tests/RegexTests/ParseTests.swift

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,14 @@ extension RegexTests {
742742
// This follows the PCRE behavior.
743743
parseTest(#"\Q\\E"#, quote("\\"))
744744

745+
// ICU allows empty quotes.
746+
parseTest(#"\Q\E"#, quote(""))
747+
748+
// Quotes may be unterminated.
749+
parseTest(#"\Qab"#, quote("ab"))
750+
parseTest(#"\Q"#, quote(""))
751+
parseTest("\\Qab\\", quote("ab\\"))
752+
745753
parseTest(#"a" ."b"#, concat("a", quote(" ."), "b"),
746754
syntax: .experimental)
747755
parseTest(#"a" .""b""#, concat("a", quote(" ."), quote("b")),
@@ -2486,8 +2494,6 @@ extension RegexTests {
24862494
diagnosticTest(#"(?P"#, .expected(")"))
24872495
diagnosticTest(#"(?R"#, .expected(")"))
24882496

2489-
diagnosticTest(#"\Qab"#, .expected("\\E"))
2490-
diagnosticTest("\\Qab\\", .expected("\\E"))
24912497
diagnosticTest(#""ab"#, .expected("\""), syntax: .experimental)
24922498
diagnosticTest(#""ab\""#, .expected("\""), syntax: .experimental)
24932499
diagnosticTest("\"ab\\", .expectedEscape, syntax: .experimental)
@@ -2560,6 +2566,8 @@ extension RegexTests {
25602566
// TODO: Custom diagnostic for missing '\Q'
25612567
diagnosticTest(#"\E"#, .invalidEscape("E"))
25622568

2569+
diagnosticTest(#"[\Q]"#, .expected("]"))
2570+
25632571
// PCRE treats these as octal, but we require a `0` prefix.
25642572
diagnosticTest(#"[\1]"#, .invalidEscape("1"))
25652573
diagnosticTest(#"[\123]"#, .invalidEscape("1"))

0 commit comments

Comments
 (0)