Skip to content

Commit 0029e7c

Browse files
committed
Allow unbounded quoted sequences \Q...
PCRE and ICU both support quoted sequences that don't have a terminating `\E`. Update the parsing to allow this.
1 parent 471e073 commit 0029e7c

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ extension Source {
579579

580580
/// Try to consume quoted content
581581
///
582-
/// Quote -> '\Q' (!'\E' .)* '\E'
582+
/// Quote -> '\Q' (!'\E' .)* '\E'?
583583
///
584584
/// With `SyntaxOptions.experimentalQuotes`, also accepts
585585
///
@@ -592,9 +592,21 @@ extension Source {
592592
mutating func lexQuote(context: ParsingContext) throws -> AST.Quote? {
593593
let str = try recordLoc { src -> String? in
594594
if src.tryEat(sequence: #"\Q"#) {
595-
return try src.expectQuoted(endingWith: #"\E"#).value
595+
let contents = src.lexUntil { src in
596+
src.isEmpty || src.starts(with: #"\E"#)
597+
}.value
598+
599+
// If we have an ending, the sequence shouldn't be empty. This isn't a
600+
// particulary useful thing to express, and [\Q\E] should definitely be
601+
// illegal. The unbounded case may however be empty.
602+
let ending = src.tryEat(sequence: #"\E"#)
603+
if ending && contents.isEmpty {
604+
throw ParseError.expectedNonEmptyContents
605+
}
606+
return contents
596607
}
597608
if context.experimentalQuotes, src.tryEat("\"") {
609+
// TODO: Can experimental quotes be empty?
598610
return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
599611
}
600612
return nil

Tests/RegexTests/ParseTests.swift

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,11 @@ extension RegexTests {
754754
// This follows the PCRE behavior.
755755
parseTest(#"\Q\\E"#, quote("\\"))
756756

757+
// Quotes may be unterminated.
758+
parseTest(#"\Qab"#, quote("ab"))
759+
parseTest(#"\Q"#, quote(""))
760+
parseTest("\\Qab\\", quote("ab\\"))
761+
757762
parseTest(#"a" ."b"#, concat("a", quote(" ."), "b"),
758763
syntax: .experimental)
759764
parseTest(#"a" .""b""#, concat("a", quote(" ."), quote("b")),
@@ -2539,8 +2544,6 @@ extension RegexTests {
25392544
diagnosticTest(#"(?P"#, .expected(")"))
25402545
diagnosticTest(#"(?R"#, .expected(")"))
25412546

2542-
diagnosticTest(#"\Qab"#, .expected("\\E"))
2543-
diagnosticTest("\\Qab\\", .expected("\\E"))
25442547
diagnosticTest(#""ab"#, .expected("\""), syntax: .experimental)
25452548
diagnosticTest(#""ab\""#, .expected("\""), syntax: .experimental)
25462549
diagnosticTest("\"ab\\", .expectedEscape, syntax: .experimental)
@@ -2619,6 +2622,10 @@ extension RegexTests {
26192622
// TODO: Custom diagnostic for missing '\Q'
26202623
diagnosticTest(#"\E"#, .invalidEscape("E"))
26212624

2625+
diagnosticTest(#"\Q\E"#, .expectedNonEmptyContents)
2626+
diagnosticTest(#"[\Q\E]"#, .expectedNonEmptyContents)
2627+
diagnosticTest(#"[\Q]"#, .expected("]"))
2628+
26222629
// PCRE treats these as octal, but we require a `0` prefix.
26232630
diagnosticTest(#"[\1]"#, .invalidEscape("1"))
26242631
diagnosticTest(#"[\123]"#, .invalidEscape("1"))

0 commit comments

Comments
 (0)