Skip to content

Commit 12fcb52

Browse files
committed
Fix scalar mode for quoted sequences in character class
Previously we would only match entire characters. Update to use the generic Character consumer logic that can handle scalar semantic mode. rdar://97209131
1 parent d30a26e commit 12fcb52

File tree

2 files changed

+56
-14
lines changed

2 files changed

+56
-14
lines changed

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ extension DSLTree._AST.Atom {
6363
extension Character {
6464
func generateConsumer(
6565
_ opts: MatchingOptions
66-
) throws -> MEProgram.ConsumeFunction? {
66+
) throws -> MEProgram.ConsumeFunction {
6767
let isCaseInsensitive = opts.isCaseInsensitive
6868
switch opts.semanticLevel {
6969
case .graphemeCluster:
@@ -456,21 +456,17 @@ extension DSLTree.CustomCharacterClass.Member {
456456
}
457457
return rhs(input, bounds)
458458
}
459-
case .quotedLiteral(let s):
460-
if opts.isCaseInsensitive {
461-
return { input, bounds in
462-
guard s.lowercased()._contains(input[bounds.lowerBound].lowercased()) else {
463-
return nil
464-
}
465-
return input.index(after: bounds.lowerBound)
466-
}
467-
} else {
468-
return { input, bounds in
469-
guard s.contains(input[bounds.lowerBound]) else {
470-
return nil
459+
case .quotedLiteral(let str):
460+
let consumers = try str.map {
461+
try $0.generateConsumer(opts)
462+
}
463+
return { input, bounds in
464+
for fn in consumers {
465+
if let idx = fn(input, bounds) {
466+
return idx
471467
}
472-
return input.index(after: bounds.lowerBound)
473468
}
469+
return nil
474470
}
475471
case .trivia:
476472
// TODO: Should probably strip this earlier...

Tests/RegexTests/MatchTests.swift

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ func firstMatchTests(
191191
enableTracing: Bool = false,
192192
dumpAST: Bool = false,
193193
xfail: Bool = false,
194+
semanticLevel: RegexSemanticLevel = .graphemeCluster,
194195
file: StaticString = #filePath,
195196
line: UInt = #line
196197
) {
@@ -203,6 +204,7 @@ func firstMatchTests(
203204
enableTracing: enableTracing,
204205
dumpAST: dumpAST,
205206
xfail: xfail,
207+
semanticLevel: semanticLevel,
206208
file: file,
207209
line: line)
208210
}
@@ -728,6 +730,50 @@ extension RegexTests {
728730
("a\u{301}", true),
729731
semanticLevel: .unicodeScalar)
730732

733+
// Scalar matching in quoted sequences.
734+
firstMatchTests(
735+
"[\\Qe\u{301}\\E]",
736+
("e", nil),
737+
("E", nil),
738+
("\u{301}", nil),
739+
(eDecomposed, eDecomposed),
740+
(eComposed, eComposed),
741+
("E\u{301}", nil),
742+
("\u{C9}", nil)
743+
)
744+
firstMatchTests(
745+
"[\\Qe\u{301}\\E]",
746+
("e", "e"),
747+
("E", nil),
748+
("\u{301}", "\u{301}"),
749+
(eDecomposed, "e"),
750+
(eComposed, nil),
751+
("E\u{301}", "\u{301}"),
752+
("\u{C9}", nil),
753+
semanticLevel: .unicodeScalar
754+
)
755+
firstMatchTests(
756+
"(?i)[\\Qe\u{301}\\E]",
757+
("e", nil),
758+
("E", nil),
759+
("\u{301}", nil),
760+
(eDecomposed, eDecomposed),
761+
(eComposed, eComposed),
762+
("E\u{301}", "E\u{301}"),
763+
("\u{C9}", "\u{C9}")
764+
)
765+
firstMatchTests(
766+
"(?i)[\\Qe\u{301}\\E]",
767+
("e", "e"),
768+
("E", "E"),
769+
("\u{301}", "\u{301}"),
770+
(eDecomposed, "e"),
771+
(eComposed, nil),
772+
("E\u{301}", "E"),
773+
("\u{C9}", nil),
774+
semanticLevel: .unicodeScalar
775+
)
776+
731777
firstMatchTest("[-]", input: "123-abcxyz", match: "-")
732778

733779
// These are metacharacters in certain contexts, but normal characters

0 commit comments

Comments
 (0)