Skip to content

Commit 2265620

Browse files
authored
Merge pull request #432 from hamishknight/unbounded-quote
2 parents 857dd7b + 57a5092 commit 2265620

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ enum ParseError: Error, Hashable {
4444
case invalidEscape(Character)
4545
case confusableCharacter(Character)
4646

47+
case quoteMayNotSpanMultipleLines
48+
4749
case cannotReferToWholePattern
4850

4951
case quantifierRequiresOperand(String)
@@ -139,6 +141,8 @@ extension ParseError: CustomStringConvertible {
139141
return "invalid escape sequence '\\\(c)'"
140142
case .confusableCharacter(let c):
141143
return "'\(c)' is confusable for a metacharacter; use '\\u{...}' instead"
144+
case .quoteMayNotSpanMultipleLines:
145+
return "quoted sequence may not span multiple lines in multi-line literal"
142146
case .cannotReferToWholePattern:
143147
return "cannot refer to whole pattern here"
144148
case .quantifierRequiresOperand(let q):

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ extension Source {
579579

580580
/// Try to consume quoted content
581581
///
582-
/// Quote -> '\Q' (!'\E' .)* '\E'
582+
/// Quote -> '\Q' (!'\E' .)* '\E'?
583583
///
584584
/// With `SyntaxOptions.experimentalQuotes`, also accepts
585585
///
@@ -592,9 +592,24 @@ extension Source {
592592
mutating func lexQuote(context: ParsingContext) throws -> AST.Quote? {
593593
let str = try recordLoc { src -> String? in
594594
if src.tryEat(sequence: #"\Q"#) {
595-
return try src.expectQuoted(endingWith: #"\E"#).value
595+
let contents = src.lexUntil { src in
596+
src.isEmpty || src.tryEat(sequence: #"\E"#)
597+
}.value
598+
599+
// In multi-line literals, the quote may not span multiple lines.
600+
if context.syntax.contains(.multilineExtendedSyntax),
601+
contents.spansMultipleLinesInRegexLiteral {
602+
throw ParseError.quoteMayNotSpanMultipleLines
603+
}
604+
605+
// The sequence must not be empty in a custom character class.
606+
if context.isInCustomCharacterClass && contents.isEmpty {
607+
throw ParseError.expectedNonEmptyContents
608+
}
609+
return contents
596610
}
597611
if context.experimentalQuotes, src.tryEat("\"") {
612+
// TODO: Can experimental quotes be empty?
598613
return try src.expectQuoted(endingWith: "\"", ignoreEscaped: true).value
599614
}
600615
return nil

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,13 @@ public func parse<S: StringProtocol>(
592592
return ast
593593
}
594594

595+
extension String {
596+
/// Whether the given string is considered multi-line for a regex literal.
597+
var spansMultipleLinesInRegexLiteral: Bool {
598+
unicodeScalars.contains(where: { $0 == "\n" || $0 == "\r" })
599+
}
600+
}
601+
595602
/// Retrieve the default set of syntax options that a delimiter and literal
596603
/// contents indicates.
597604
fileprivate func defaultSyntaxOptions(
@@ -601,8 +608,7 @@ fileprivate func defaultSyntaxOptions(
601608
case .forwardSlash:
602609
// For an extended syntax forward slash e.g #/.../#, extended syntax is
603610
// permitted if it spans multiple lines.
604-
if delim.poundCount > 0 &&
605-
contents.unicodeScalars.contains(where: { $0 == "\n" || $0 == "\r" }) {
611+
if delim.poundCount > 0 && contents.spansMultipleLinesInRegexLiteral {
606612
return .multilineExtendedSyntax
607613
}
608614
return .traditional

Tests/RegexTests/ParseTests.swift

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,14 @@ extension RegexTests {
756756
// This follows the PCRE behavior.
757757
parseTest(#"\Q\\E"#, quote("\\"))
758758

759+
// ICU allows quotes to be empty outside of custom character classes.
760+
parseTest(#"\Q\E"#, quote(""))
761+
762+
// Quotes may be unterminated.
763+
parseTest(#"\Qab"#, quote("ab"))
764+
parseTest(#"\Q"#, quote(""))
765+
parseTest("\\Qab\\", quote("ab\\"))
766+
759767
parseTest(#"a" ."b"#, concat("a", quote(" ."), "b"),
760768
syntax: .experimental)
761769
parseTest(#"a" .""b""#, concat("a", quote(" ."), quote("b")),
@@ -2596,8 +2604,6 @@ extension RegexTests {
25962604
diagnosticTest(#"(?P"#, .expected(")"))
25972605
diagnosticTest(#"(?R"#, .expected(")"))
25982606

2599-
diagnosticTest(#"\Qab"#, .expected("\\E"))
2600-
diagnosticTest("\\Qab\\", .expected("\\E"))
26012607
diagnosticTest(#""ab"#, .expected("\""), syntax: .experimental)
26022608
diagnosticTest(#""ab\""#, .expected("\""), syntax: .experimental)
26032609
diagnosticTest("\"ab\\", .expectedEscape, syntax: .experimental)
@@ -2676,6 +2682,9 @@ extension RegexTests {
26762682
// TODO: Custom diagnostic for missing '\Q'
26772683
diagnosticTest(#"\E"#, .invalidEscape("E"))
26782684

2685+
diagnosticTest(#"[\Q\E]"#, .expectedNonEmptyContents)
2686+
diagnosticTest(#"[\Q]"#, .expected("]"))
2687+
26792688
// PCRE treats these as octal, but we require a `0` prefix.
26802689
diagnosticTest(#"[\1]"#, .invalidEscape("1"))
26812690
diagnosticTest(#"[\123]"#, .invalidEscape("1"))
@@ -2772,6 +2781,26 @@ extension RegexTests {
27722781
""", .cannotRemoveExtendedSyntaxInMultilineMode
27732782
)
27742783

2784+
diagnosticWithDelimitersTest(#"""
2785+
#/
2786+
\Q
2787+
\E
2788+
/#
2789+
"""#, .quoteMayNotSpanMultipleLines)
2790+
2791+
diagnosticWithDelimitersTest(#"""
2792+
#/
2793+
\Qabc
2794+
\E
2795+
/#
2796+
"""#, .quoteMayNotSpanMultipleLines)
2797+
2798+
diagnosticWithDelimitersTest(#"""
2799+
#/
2800+
\Q
2801+
/#
2802+
"""#, .quoteMayNotSpanMultipleLines)
2803+
27752804
// MARK: Group specifiers
27762805

27772806
diagnosticTest(#"(*"#, .unknownGroupKind("*"))

0 commit comments

Comments
 (0)