Skip to content

Commit 3c7d34f

Browse files
committed
Implement CharacterClass.anyNonNewline
rdar://97029702
1 parent ae3157c commit 3c7d34f

File tree

6 files changed

+101
-7
lines changed

6 files changed

+101
-7
lines changed

Sources/RegexBuilder/CharacterClass.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ extension RegexComponent where Self == CharacterClass {
4545
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
4646
}
4747

48+
public static var anyNonNewline: CharacterClass {
49+
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
50+
}
51+
4852
public static var anyGraphemeCluster: CharacterClass {
4953
.init(unconverted: ._anyGrapheme)
5054
}

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ fileprivate extension Compiler.ByteCodeGen {
5858
case .any:
5959
emitAny()
6060

61+
case .anyNonNewline:
62+
emitAnyNonNewline()
63+
6164
case .dot:
6265
emitDot()
6366

@@ -303,11 +306,7 @@ fileprivate extension Compiler.ByteCodeGen {
303306
}
304307
}
305308

306-
mutating func emitDot() {
307-
if options.dotMatchesNewline {
308-
emitAny()
309-
return
310-
}
309+
mutating func emitAnyNonNewline() {
311310
switch options.semanticLevel {
312311
case .graphemeCluster:
313312
builder.buildConsume { input, bounds in
@@ -324,6 +323,14 @@ fileprivate extension Compiler.ByteCodeGen {
324323
}
325324
}
326325

326+
mutating func emitDot() {
327+
if options.dotMatchesNewline {
328+
emitAny()
329+
} else {
330+
emitAnyNonNewline()
331+
}
332+
}
333+
327334
mutating func emitAlternation(
328335
_ children: [DSLTree.Node]
329336
) throws {

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,22 @@ extension DSLTree.Atom {
123123
}
124124
}
125125

126+
case .anyNonNewline:
127+
switch opts.semanticLevel {
128+
case .graphemeCluster:
129+
return { input, bounds in
130+
input[bounds.lowerBound].isNewline
131+
? nil
132+
: input.index(after: bounds.lowerBound)
133+
}
134+
case .unicodeScalar:
135+
return { input, bounds in
136+
input[bounds.lowerBound].isNewline
137+
? nil
138+
: input.unicodeScalars.index(after: bounds.lowerBound)
139+
}
140+
}
141+
126142
case .dot:
127143
throw Unreachable(".atom(.dot) should be handled by emitDot")
128144

Sources/_StringProcessing/PrintAsPattern.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,6 +1109,9 @@ extension DSLTree.Atom {
11091109
case .any:
11101110
return (".any", true)
11111111

1112+
case .anyNonNewline:
1113+
return (".anyNonNewline", true)
1114+
11121115
case .dot:
11131116
// The DSL does not have an equivalent to '.', print as a regex.
11141117
return ("/./", false)
@@ -1155,6 +1158,9 @@ extension DSLTree.Atom {
11551158
case .any:
11561159
return "(?s:.)"
11571160

1161+
case .anyNonNewline:
1162+
return "(?-s:.)"
1163+
11581164
case .dot:
11591165
return "."
11601166

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,10 @@ extension DSLTree {
249249
/// Any character, including newlines.
250250
case any
251251

252+
/// Any character, excluding newlines. This differs from '.', as it is not
253+
/// affected by single line mode.
254+
case anyNonNewline
255+
252256
/// The DSL representation of '.' in a regex literal. This does not match
253257
/// newlines unless single line mode is enabled.
254258
case dot
@@ -875,8 +879,8 @@ extension DSLTree.Atom {
875879
switch self {
876880
case .changeMatchingOptions, .assertion:
877881
return false
878-
case .char, .scalar, .any, .dot, .backreference, .symbolicReference,
879-
.unconverted:
882+
case .char, .scalar, .any, .anyNonNewline, .dot, .backreference,
883+
.symbolicReference, .unconverted:
880884
return true
881885
}
882886
}

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,63 @@ class RegexDSLTests: XCTestCase {
309309
}
310310
}
311311

312+
func testAnyNonNewline() throws {
313+
// `.anyNonNewline` is `.` without single-line mode.
314+
for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
315+
for dotMatchesNewline in [true, false] {
316+
try _testDSLCaptures(
317+
("abcdef", "abcdef"),
318+
("abcdef\n", nil),
319+
("\r\n", nil),
320+
("\r", nil),
321+
("\n", nil),
322+
matchType: Substring.self, ==)
323+
{
324+
Regex {
325+
OneOrMore(.anyNonNewline)
326+
}.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
327+
}
328+
329+
try _testDSLCaptures(
330+
("abcdef", nil),
331+
("abcdef\n", nil),
332+
("\r\n", "\r\n"),
333+
("\r", "\r"),
334+
("\n", "\n"),
335+
matchType: Substring.self, ==)
336+
{
337+
Regex {
338+
OneOrMore(.anyNonNewline.inverted)
339+
}.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
340+
}
341+
342+
try _testDSLCaptures(
343+
("abc", "abc"),
344+
("abcd", nil),
345+
("\r\n", nil),
346+
("\r", nil),
347+
("\n", nil),
348+
matchType: Substring.self, ==)
349+
{
350+
Regex {
351+
OneOrMore(CharacterClass.anyNonNewline.intersection(.anyOf("\n\rabc")))
352+
}.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
353+
}
354+
}
355+
}
356+
357+
try _testDSLCaptures(
358+
("\r\n", "\r\n"), matchType: Substring.self, ==) {
359+
CharacterClass.anyNonNewline.inverted
360+
}
361+
try _testDSLCaptures(
362+
("\r\n", nil), matchType: Substring.self, ==) {
363+
Regex {
364+
CharacterClass.anyNonNewline.inverted
365+
}.matchingSemantics(.unicodeScalar)
366+
}
367+
}
368+
312369
func testMatchResultDotZeroWithoutCapture() throws {
313370
let match = try XCTUnwrap("aaa".wholeMatch { OneOrMore { "a" } })
314371
XCTAssertEqual(match.0, "aaa")

0 commit comments

Comments
 (0)