Skip to content

Commit 1bcd2cb

Browse files
committed
Allow unbounded quoted sequences \Q...
PCRE and ICU both support quoted sequences that don't have a terminating `\E`. Update the parsing to allow this.
1 parent 88dc9dd commit 1bcd2cb

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ extension Source {
565565

566566
/// Try to consume quoted content
567567
///
568-
/// Quote -> '\Q' (!'\E' .)* '\E'
568+
/// Quote -> '\Q' (!'\E' .)* '\E'?
569569
///
570570
/// With `SyntaxOptions.experimentalQuotes`, also accepts
571571
///
@@ -578,9 +578,21 @@ extension Source {
578578
mutating func lexQuote(context: ParsingContext) throws -> AST.Quote? {
579579
let str = try recordLoc { src -> String? in
580580
if src.tryEat(sequence: #"\Q"#) {
581-
return try src.expectQuoted(endingWith: #"\E"#).value
581+
let contents = src.lexUntil { src in
582+
src.isEmpty || src.starts(with: #"\E"#)
583+
}.value
584+
585+
// If we have an ending, the sequence shouldn't be empty. This isn't a
586+
// particulary useful thing to express, and [\Q\E] should definitely be
587+
// illegal. The unbounded case may however be empty.
588+
let ending = src.tryEat(sequence: #"\E"#)
589+
if ending && contents.isEmpty {
590+
throw ParseError.expectedNonEmptyContents
591+
}
592+
return contents
582593
}
583594
if context.experimentalQuotes, src.tryEat("\"") {
595+
// TODO: Can experimental quotes be empty?
584596
return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
585597
}
586598
return nil

Tests/RegexTests/ParseTests.swift

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,11 @@ extension RegexTests {
742742
// This follows the PCRE behavior.
743743
parseTest(#"\Q\\E"#, quote("\\"))
744744

745+
// Quotes may be unterminated.
746+
parseTest(#"\Qab"#, quote("ab"))
747+
parseTest(#"\Q"#, quote(""))
748+
parseTest("\\Qab\\", quote("ab\\"))
749+
745750
parseTest(#"a" ."b"#, concat("a", quote(" ."), "b"),
746751
syntax: .experimental)
747752
parseTest(#"a" .""b""#, concat("a", quote(" ."), quote("b")),
@@ -2486,8 +2491,6 @@ extension RegexTests {
24862491
diagnosticTest(#"(?P"#, .expected(")"))
24872492
diagnosticTest(#"(?R"#, .expected(")"))
24882493

2489-
diagnosticTest(#"\Qab"#, .expected("\\E"))
2490-
diagnosticTest("\\Qab\\", .expected("\\E"))
24912494
diagnosticTest(#""ab"#, .expected("\""), syntax: .experimental)
24922495
diagnosticTest(#""ab\""#, .expected("\""), syntax: .experimental)
24932496
diagnosticTest("\"ab\\", .expectedEscape, syntax: .experimental)
@@ -2560,6 +2563,10 @@ extension RegexTests {
25602563
// TODO: Custom diagnostic for missing '\Q'
25612564
diagnosticTest(#"\E"#, .invalidEscape("E"))
25622565

2566+
diagnosticTest(#"\Q\E"#, .expectedNonEmptyContents)
2567+
diagnosticTest(#"[\Q\E]"#, .expectedNonEmptyContents)
2568+
diagnosticTest(#"[\Q]"#, .expected("]"))
2569+
25632570
// PCRE treats these as octal, but we require a `0` prefix.
25642571
diagnosticTest(#"[\1]"#, .invalidEscape("1"))
25652572
diagnosticTest(#"[\123]"#, .invalidEscape("1"))

0 commit comments

Comments
 (0)