Skip to content

Commit 8087a3d

Browse files
authored
Merge pull request #583 from hamishknight/character-work-5.7
2 parents 65178f9 + 3c7d34f commit 8087a3d

18 files changed

+663
-502
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,30 @@ public struct Anchor {
3737

3838
@available(SwiftStdlib 5.7, *)
3939
extension Anchor: RegexComponent {
40-
var baseAssertion: DSLTree._AST.AssertionKind {
40+
var baseAssertion: DSLTree.Atom.Assertion {
4141
switch kind {
42-
case .startOfSubject: return .startOfSubject(isInverted)
43-
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
44-
case .endOfSubject: return .endOfSubject(isInverted)
45-
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
46-
case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
47-
case .startOfLine: return .startOfLine(isInverted)
48-
case .endOfLine: return .endOfLine(isInverted)
49-
case .wordBoundary: return .wordBoundary(isInverted)
42+
case .startOfSubject:
43+
// FIXME: Inverted?
44+
return .startOfSubject
45+
case .endOfSubjectBeforeNewline:
46+
// FIXME: Inverted?
47+
return .endOfSubjectBeforeNewline
48+
case .endOfSubject:
49+
// FIXME: Inverted?
50+
return .endOfSubject
51+
case .firstMatchingPositionInSubject:
52+
// FIXME: Inverted?
53+
return .firstMatchingPositionInSubject
54+
case .textSegmentBoundary:
55+
return isInverted ? .notTextSegment : .textSegment
56+
case .startOfLine:
57+
// FIXME: Inverted?
58+
return .startOfLine
59+
case .endOfLine:
60+
// FIXME: Inverted?
61+
return .endOfLine
62+
case .wordBoundary:
63+
return isInverted ? .notWordBoundary : .wordBoundary
5064
}
5165
}
5266

Sources/RegexBuilder/CharacterClass.swift

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,8 @@ public struct CharacterClass {
2020
self.ccc = ccc
2121
}
2222

23-
init(unconverted model: _CharacterClassModel) {
24-
guard let ccc = model.makeDSLTreeCharacterClass() else {
25-
fatalError("Unsupported character class")
26-
}
27-
self.ccc = ccc
23+
init(unconverted atom: DSLTree._AST.Atom) {
24+
self.ccc = .init(members: [.atom(.unconverted(atom))])
2825
}
2926
}
3027

@@ -48,16 +45,20 @@ extension RegexComponent where Self == CharacterClass {
4845
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
4946
}
5047

48+
public static var anyNonNewline: CharacterClass {
49+
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
50+
}
51+
5152
public static var anyGraphemeCluster: CharacterClass {
52-
.init(unconverted: .anyGrapheme)
53+
.init(unconverted: ._anyGrapheme)
5354
}
5455

5556
public static var whitespace: CharacterClass {
56-
.init(unconverted: .whitespace)
57+
.init(unconverted: ._whitespace)
5758
}
5859

5960
public static var digit: CharacterClass {
60-
.init(unconverted: .digit)
61+
.init(unconverted: ._digit)
6162
}
6263

6364
public static var hexDigit: CharacterClass {
@@ -69,19 +70,19 @@ extension RegexComponent where Self == CharacterClass {
6970
}
7071

7172
public static var horizontalWhitespace: CharacterClass {
72-
.init(unconverted: .horizontalWhitespace)
73+
.init(unconverted: ._horizontalWhitespace)
7374
}
7475

7576
public static var newlineSequence: CharacterClass {
76-
.init(unconverted: .newlineSequence)
77+
.init(unconverted: ._newlineSequence)
7778
}
7879

7980
public static var verticalWhitespace: CharacterClass {
80-
.init(unconverted: .verticalWhitespace)
81+
.init(unconverted: ._verticalWhitespace)
8182
}
8283

8384
public static var word: CharacterClass {
84-
.init(unconverted: .word)
85+
.init(unconverted: ._word)
8586
}
8687
}
8788

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 11 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,13 @@ extension AST {
6060
case namedCharacter(String)
6161

6262
/// .
63-
case any
63+
case dot
6464

6565
/// ^
66-
case startOfLine
66+
case caretAnchor
6767

6868
/// $
69-
case endOfLine
69+
case dollarAnchor
7070

7171
// References
7272
case backreference(Reference)
@@ -104,9 +104,9 @@ extension AST.Atom {
104104
case .callout(let v): return v
105105
case .backtrackingDirective(let v): return v
106106
case .changeMatchingOptions(let v): return v
107-
case .any: return nil
108-
case .startOfLine: return nil
109-
case .endOfLine: return nil
107+
case .dot: return nil
108+
case .caretAnchor: return nil
109+
case .dollarAnchor: return nil
110110
case .invalid: return nil
111111
}
112112
}
@@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
511511
}
512512
}
513513

514-
extension AST.Atom {
515-
/// Anchors and other built-in zero-width assertions.
516-
public enum AssertionKind: String, Hashable {
517-
/// \A
518-
case startOfSubject = #"\A"#
519-
520-
/// \Z
521-
case endOfSubjectBeforeNewline = #"\Z"#
522-
523-
/// \z
524-
case endOfSubject = #"\z"#
525-
526-
/// \K
527-
case resetStartOfMatch = #"\K"#
528-
529-
/// \G
530-
case firstMatchingPositionInSubject = #"\G"#
531-
532-
/// \y
533-
case textSegment = #"\y"#
534-
535-
/// \Y
536-
case notTextSegment = #"\Y"#
537-
538-
/// ^
539-
case startOfLine = #"^"#
540-
541-
/// $
542-
case endOfLine = #"$"#
543-
544-
/// \b (from outside a custom character class)
545-
case wordBoundary = #"\b"#
546-
547-
/// \B
548-
case notWordBoundary = #"\B"#
549-
550-
}
551-
552-
public var assertionKind: AssertionKind? {
553-
switch kind {
554-
case .startOfLine: return .startOfLine
555-
case .endOfLine: return .endOfLine
556-
557-
case .escaped(.wordBoundary): return .wordBoundary
558-
case .escaped(.notWordBoundary): return .notWordBoundary
559-
case .escaped(.startOfSubject): return .startOfSubject
560-
case .escaped(.endOfSubject): return .endOfSubject
561-
case .escaped(.textSegment): return .textSegment
562-
case .escaped(.notTextSegment): return .notTextSegment
563-
case .escaped(.endOfSubjectBeforeNewline):
564-
return .endOfSubjectBeforeNewline
565-
case .escaped(.firstMatchingPositionInSubject):
566-
return .firstMatchingPositionInSubject
567-
568-
case .escaped(.resetStartOfMatch): return .resetStartOfMatch
569-
570-
default: return nil
571-
}
572-
}
573-
}
574-
575514
extension AST.Atom {
576515
public enum Callout: Hashable {
577516
/// A PCRE callout written `(?C...)`
@@ -806,9 +745,9 @@ extension AST.Atom {
806745
// the AST? Or defer for the matching engine?
807746
return nil
808747

809-
case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
810-
.backreference, .subpattern, .callout, .backtrackingDirective,
811-
.changeMatchingOptions, .invalid:
748+
case .scalarSequence, .property, .dot, .caretAnchor,
749+
.dollarAnchor, .backreference, .subpattern, .callout,
750+
.backtrackingDirective, .changeMatchingOptions, .invalid:
812751
return nil
813752
}
814753
}
@@ -858,7 +797,7 @@ extension AST.Atom {
858797
case .keyboardMetaControl(let x):
859798
return "\\M-\\C-\(x)"
860799

861-
case .property, .escaped, .any, .startOfLine, .endOfLine,
800+
case .property, .escaped, .dot, .caretAnchor, .dollarAnchor,
862801
.backreference, .subpattern, .namedCharacter, .callout,
863802
.backtrackingDirective, .changeMatchingOptions, .invalid:
864803
return nil
@@ -874,7 +813,7 @@ extension AST.Atom {
874813
// TODO: Are callouts quantifiable?
875814
case .escaped(let esc):
876815
return esc.isQuantifiable
877-
case .startOfLine, .endOfLine:
816+
case .caretAnchor, .dollarAnchor:
878817
return false
879818
default:
880819
return true

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2073,9 +2073,9 @@ extension Parser {
20732073
p.unreachable("Should have lexed a group or group-like atom")
20742074

20752075
// (sometimes) special metacharacters
2076-
case ".": return customCC ? .char(".") : .any
2077-
case "^": return customCC ? .char("^") : .startOfLine
2078-
case "$": return customCC ? .char("$") : .endOfLine
2076+
case ".": return customCC ? .char(".") : .dot
2077+
case "^": return customCC ? .char("^") : .caretAnchor
2078+
case "$": return customCC ? .char("$") : .dollarAnchor
20792079

20802080
// Escaped
20812081
case "\\": return p.expectEscaped().value

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ extension RegexValidator {
221221
) {
222222
switch esc {
223223
case .resetStartOfMatch, .singleDataUnit, .trueAnychar,
224-
// '\N' needs to be emitted using 'emitAny'.
224+
// '\N' needs to be emitted using 'emitDot'.
225225
.notNewline:
226226
error(.unsupported("'\\\(esc.character)'"), at: loc)
227227

@@ -288,7 +288,7 @@ extension RegexValidator {
288288
at: atom.location)
289289
}
290290

291-
case .char, .scalar, .startOfLine, .endOfLine, .any:
291+
case .char, .scalar, .caretAnchor, .dollarAnchor, .dot:
292292
break
293293

294294
case .invalid:

Sources/_RegexParser/Regex/Printing/DumpAST.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,9 @@ extension AST.Atom {
153153
case .keyboardControl, .keyboardMeta, .keyboardMetaControl:
154154
fatalError("TODO")
155155

156-
case .any: return "."
157-
case .startOfLine: return "^"
158-
case .endOfLine: return "$"
156+
case .dot: return "."
157+
case .caretAnchor: return "^"
158+
case .dollarAnchor: return "$"
159159

160160
case .backreference(let r), .subpattern(let r):
161161
return "\(r._dumpBase)"

Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,6 @@ extension AST.Atom.Number {
237237

238238
extension AST.Atom {
239239
var _canonicalBase: String {
240-
if let anchor = self.assertionKind {
241-
return anchor.rawValue
242-
}
243240
if let lit = self.literalStringValue {
244241
// FIXME: We may have to re-introduce escapes
245242
// For example, `\.` will come back as "." instead
@@ -248,6 +245,10 @@ extension AST.Atom {
248245
return lit
249246
}
250247
switch self.kind {
248+
case .caretAnchor:
249+
return "^"
250+
case .dollarAnchor:
251+
return "$"
251252
case .escaped(let e):
252253
return "\\\(e.character)"
253254
case .backreference(let br):

0 commit comments

Comments
 (0)