Skip to content

Commit dd38d67

Browse files
committed
Allow scoped removal of extended syntax in multi-line literals
Relax the ban on unsetting extended syntax in a multi-line literal such that it does not apply to a scoped unset e.g `(?-x:...)`, as long as it does not span multiple lines.
1 parent 7bdc6c6 commit dd38d67

File tree

4 files changed

+108
-20
lines changed

4 files changed

+108
-20
lines changed

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ enum ParseError: Error, Hashable {
4545
case confusableCharacter(Character)
4646

4747
case quoteMayNotSpanMultipleLines
48+
case unsetExtendedSyntaxMayNotSpanMultipleLines
4849

4950
case cannotReferToWholePattern
5051

@@ -143,6 +144,8 @@ extension ParseError: CustomStringConvertible {
143144
return "'\(c)' is confusable for a metacharacter; use '\\u{...}' instead"
144145
case .quoteMayNotSpanMultipleLines:
145146
return "quoted sequence may not span multiple lines in multi-line literal"
147+
case .unsetExtendedSyntaxMayNotSpanMultipleLines:
148+
return "group that unsets extended syntax may not span multiple lines in multi-line literal"
146149
case .cannotReferToWholePattern:
147150
return "cannot refer to whole pattern here"
148151
case .quantifierRequiresOperand(let q):

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,6 @@ extension Source {
835835
if opt.isSemanticMatchingLevel {
836836
throw ParseError.cannotRemoveSemanticsOptions
837837
}
838-
// Extended syntax may not be removed if in multi-line mode.
839-
if context.syntax.contains(.multilineLiteral) && opt.isAnyExtended {
840-
throw ParseError.cannotRemoveExtendedSyntaxInMultilineMode
841-
}
842838
removing.append(opt)
843839
}
844840
return .init(caretLoc: nil, adding: adding, minusLoc: ateMinus.location,

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -289,12 +289,16 @@ extension Parser {
289289
/// Apply the syntax options of a given matching option sequence to the
290290
/// current set of options.
291291
private mutating func applySyntaxOptions(
292-
of opts: AST.MatchingOptionSequence
293-
) {
292+
of opts: AST.MatchingOptionSequence, isScoped: Bool
293+
) throws {
294294
func mapOption(_ option: SyntaxOptions,
295295
_ pred: (AST.MatchingOption) -> Bool) {
296296
if opts.resetsCurrentOptions {
297-
context.syntax.remove(option)
297+
// Extended syntax is never reset in a multi-line literal.
298+
if !(option == .extendedSyntax &&
299+
context.syntax.contains(.multilineLiteral)) {
300+
context.syntax.remove(option)
301+
}
298302
}
299303
if opts.adding.contains(where: pred) {
300304
context.syntax.insert(option)
@@ -311,22 +315,42 @@ extension Parser {
311315
mapOption(.namedCapturesOnly, .namedCapturesOnly)
312316

313317
// (?x), (?xx)
314-
// We skip this for multi-line, as extended syntax is always enabled there.
318+
// We skip this for unscoped multi-line, as extended syntax is always
319+
// enabled there. A scoped group however (?-x:...) allows extended syntax to
320+
// be disabled.
315321
// TODO: PCRE differentiates between (?x) and (?xx) where only the latter
316322
// handles non-semantic whitespace in a custom character class. Other
317323
// engines such as Oniguruma, Java, and ICU do this under (?x). Therefore,
318324
// treat (?x) and (?xx) as the same option here. If we ever get a strict
319325
// PCRE mode, we will need to change this to handle that.
320-
if !context.syntax.contains(.multilineLiteral) {
326+
if !isScoped && context.syntax.contains(.multilineLiteral) {
327+
// An unscoped removal of extended syntax is not allowed in a multi-line
328+
// literal.
329+
if let opt = opts.removing.first(where: \.isAnyExtended) {
330+
throw Source.LocatedError(
331+
ParseError.cannotRemoveExtendedSyntaxInMultilineMode, opt.location)
332+
}
333+
// We may have an unscoped change of extended syntax, but it must either
334+
// be:
335+
// - An addition of extended syntax
336+
// - A reset of matching options
337+
// The former is redundant, and the latter shouldn't affect extended
338+
// syntax in a multi-line literal. So we don't need to do anything extra
339+
// here.
340+
} else {
341+
// We either have a scoped change of extended syntax, or this is a
342+
// single-line literal.
321343
mapOption(.extendedSyntax, \.isAnyExtended)
322344
}
323345
}
324346

325347
/// Apply the syntax options of a matching option changing group to the
326348
/// current set of options.
327-
private mutating func applySyntaxOptions(of group: AST.Group.Kind) {
349+
private mutating func applySyntaxOptions(
350+
of group: AST.Group.Kind, isScoped: Bool
351+
) throws {
328352
if case .changeMatchingOptions(let seq) = group {
329-
applySyntaxOptions(of: seq)
353+
try applySyntaxOptions(of: seq, isScoped: isScoped)
330354
}
331355
}
332356

@@ -337,14 +361,25 @@ extension Parser {
337361
context.recordGroup(kind.value)
338362

339363
let currentSyntax = context.syntax
340-
applySyntaxOptions(of: kind.value)
364+
try applySyntaxOptions(of: kind.value, isScoped: true)
341365
defer {
342366
context.syntax = currentSyntax
343367
}
344-
368+
let unsetsExtendedSyntax = currentSyntax.contains(.extendedSyntax) &&
369+
!context.syntax.contains(.extendedSyntax)
345370
let child = try parseNode()
346371
try source.expect(")")
347-
return .init(kind, child, loc(start))
372+
let groupLoc = loc(start)
373+
374+
// In multi-line literals, the body of a group that unsets extended syntax
375+
// may not span multiple lines.
376+
if unsetsExtendedSyntax &&
377+
context.syntax.contains(.multilineLiteral) &&
378+
source[child.location.range].spansMultipleLinesInRegexLiteral {
379+
throw Source.LocatedError(
380+
ParseError.unsetExtendedSyntaxMayNotSpanMultipleLines, groupLoc)
381+
}
382+
return .init(kind, child, groupLoc)
348383
}
349384

350385
/// Consume the body of an absent function.
@@ -438,7 +473,7 @@ extension Parser {
438473
// If we have a change matching options atom, apply the syntax options. We
439474
// already take care of scoping syntax options within a group.
440475
if case .changeMatchingOptions(let opts) = atom.kind {
441-
applySyntaxOptions(of: opts)
476+
try applySyntaxOptions(of: opts, isScoped: false)
442477
}
443478
// TODO: track source locations
444479
return .atom(atom)
@@ -592,7 +627,7 @@ public func parse<S: StringProtocol>(
592627
return ast
593628
}
594629

595-
extension String {
630+
extension StringProtocol {
596631
/// Whether the given string is considered multi-line for a regex literal.
597632
var spansMultipleLinesInRegexLiteral: Bool {
598633
unicodeScalars.contains(where: { $0 == "\n" || $0 == "\r" })

Tests/RegexTests/ParseTests.swift

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,13 @@ extension RegexTests {
17771777
" ", "b"
17781778
)
17791779
)
1780+
parseTest(
1781+
"(?x) a (?^: b)", concat(
1782+
changeMatchingOptions(matchingOptions(adding: .extended)),
1783+
"a",
1784+
changeMatchingOptions(unsetMatchingOptions(), concat(" ", "b"))
1785+
)
1786+
)
17801787

17811788
parseTest("[ # abc]", charClass(" ", "#", " ", "a", "b", "c"))
17821789
parseTest("[#]", charClass("#"))
@@ -2193,6 +2200,14 @@ extension RegexTests {
21932200
/#
21942201
""", changeMatchingOptions(unsetMatchingOptions())
21952202
)
2203+
parseWithDelimitersTest("""
2204+
#/
2205+
(?^:
2206+
# comment
2207+
)
2208+
/#
2209+
""", changeMatchingOptions(unsetMatchingOptions(), empty())
2210+
)
21962211

21972212
// (?x) has no effect.
21982213
parseWithDelimitersTest("""
@@ -2203,6 +2218,27 @@ extension RegexTests {
22032218
""", changeMatchingOptions(matchingOptions(adding: .extended))
22042219
)
22052220

2221+
// Scoped removal of extended syntax is allowed as long as it does not span
2222+
// multiple lines.
2223+
parseWithDelimitersTest("""
2224+
#/
2225+
(?-x:a b)
2226+
/#
2227+
""", changeMatchingOptions(
2228+
matchingOptions(removing: .extended),
2229+
concat("a", " ", "b")
2230+
)
2231+
)
2232+
parseWithDelimitersTest("""
2233+
#/
2234+
(?-xx:a b)
2235+
/#
2236+
""", changeMatchingOptions(
2237+
matchingOptions(removing: .extraExtended),
2238+
concat("a", " ", "b")
2239+
)
2240+
)
2241+
22062242
parseWithDelimitersTest(#"""
22072243
#/
22082244
\p{
@@ -2782,17 +2818,35 @@ extension RegexTests {
27822818
/#
27832819
""", .cannotRemoveExtendedSyntaxInMultilineMode
27842820
)
2821+
2822+
// Scoped removal of extended syntax may not span multiple lines
27852823
diagnosticWithDelimitersTest("""
27862824
#/
2787-
(?-x:a b)
2825+
(?-x:a b
2826+
)
27882827
/#
2789-
""", .cannotRemoveExtendedSyntaxInMultilineMode
2828+
""", .unsetExtendedSyntaxMayNotSpanMultipleLines
27902829
)
27912830
diagnosticWithDelimitersTest("""
27922831
#/
2793-
(?-xx:a b)
2832+
(?-x:a
2833+
b)
27942834
/#
2795-
""", .cannotRemoveExtendedSyntaxInMultilineMode
2835+
""", .unsetExtendedSyntaxMayNotSpanMultipleLines
2836+
)
2837+
diagnosticWithDelimitersTest("""
2838+
#/
2839+
(?-xx:
2840+
a b)
2841+
/#
2842+
""", .unsetExtendedSyntaxMayNotSpanMultipleLines
2843+
)
2844+
diagnosticWithDelimitersTest("""
2845+
#/
2846+
(?x-x:
2847+
a b)
2848+
/#
2849+
""", .unsetExtendedSyntaxMayNotSpanMultipleLines
27962850
)
27972851

27982852
diagnosticWithDelimitersTest(#"""

0 commit comments

Comments
 (0)