Skip to content

Commit 1f2ae04

Browse files
authored
Merge pull request #580 from hamishknight/character-work
Fix a few DSL APIs
2 parents 991d90c + 9456c54 commit 1f2ae04

18 files changed

+599
-209
lines changed

Sources/RegexBuilder/Anchor.swift

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,30 @@ public struct Anchor {
3737

3838
@available(SwiftStdlib 5.7, *)
3939
extension Anchor: RegexComponent {
40-
var baseAssertion: DSLTree._AST.AssertionKind {
40+
var baseAssertion: DSLTree.Atom.Assertion {
4141
switch kind {
42-
case .startOfSubject: return .startOfSubject(isInverted)
43-
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
44-
case .endOfSubject: return .endOfSubject(isInverted)
45-
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
46-
case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
47-
case .startOfLine: return .startOfLine(isInverted)
48-
case .endOfLine: return .endOfLine(isInverted)
49-
case .wordBoundary: return .wordBoundary(isInverted)
42+
case .startOfSubject:
43+
// FIXME: Inverted?
44+
return .startOfSubject
45+
case .endOfSubjectBeforeNewline:
46+
// FIXME: Inverted?
47+
return .endOfSubjectBeforeNewline
48+
case .endOfSubject:
49+
// FIXME: Inverted?
50+
return .endOfSubject
51+
case .firstMatchingPositionInSubject:
52+
// FIXME: Inverted?
53+
return .firstMatchingPositionInSubject
54+
case .textSegmentBoundary:
55+
return isInverted ? .notTextSegment : .textSegment
56+
case .startOfLine:
57+
// FIXME: Inverted?
58+
return .startOfLine
59+
case .endOfLine:
60+
// FIXME: Inverted?
61+
return .endOfLine
62+
case .wordBoundary:
63+
return isInverted ? .notWordBoundary : .wordBoundary
5064
}
5165
}
5266

Sources/RegexBuilder/CharacterClass.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ extension RegexComponent where Self == CharacterClass {
4545
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
4646
}
4747

48+
public static var anyNonNewline: CharacterClass {
49+
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
50+
}
51+
4852
public static var anyGraphemeCluster: CharacterClass {
4953
.init(unconverted: ._anyGrapheme)
5054
}

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 11 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,13 @@ extension AST {
6060
case namedCharacter(String)
6161

6262
/// .
63-
case any
63+
case dot
6464

6565
/// ^
66-
case startOfLine
66+
case caretAnchor
6767

6868
/// $
69-
case endOfLine
69+
case dollarAnchor
7070

7171
// References
7272
case backreference(Reference)
@@ -104,9 +104,9 @@ extension AST.Atom {
104104
case .callout(let v): return v
105105
case .backtrackingDirective(let v): return v
106106
case .changeMatchingOptions(let v): return v
107-
case .any: return nil
108-
case .startOfLine: return nil
109-
case .endOfLine: return nil
107+
case .dot: return nil
108+
case .caretAnchor: return nil
109+
case .dollarAnchor: return nil
110110
case .invalid: return nil
111111
}
112112
}
@@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
511511
}
512512
}
513513

514-
extension AST.Atom {
515-
/// Anchors and other built-in zero-width assertions.
516-
public enum AssertionKind: String, Hashable {
517-
/// \A
518-
case startOfSubject = #"\A"#
519-
520-
/// \Z
521-
case endOfSubjectBeforeNewline = #"\Z"#
522-
523-
/// \z
524-
case endOfSubject = #"\z"#
525-
526-
/// \K
527-
case resetStartOfMatch = #"\K"#
528-
529-
/// \G
530-
case firstMatchingPositionInSubject = #"\G"#
531-
532-
/// \y
533-
case textSegment = #"\y"#
534-
535-
/// \Y
536-
case notTextSegment = #"\Y"#
537-
538-
/// ^
539-
case startOfLine = #"^"#
540-
541-
/// $
542-
case endOfLine = #"$"#
543-
544-
/// \b (from outside a custom character class)
545-
case wordBoundary = #"\b"#
546-
547-
/// \B
548-
case notWordBoundary = #"\B"#
549-
550-
}
551-
552-
public var assertionKind: AssertionKind? {
553-
switch kind {
554-
case .startOfLine: return .startOfLine
555-
case .endOfLine: return .endOfLine
556-
557-
case .escaped(.wordBoundary): return .wordBoundary
558-
case .escaped(.notWordBoundary): return .notWordBoundary
559-
case .escaped(.startOfSubject): return .startOfSubject
560-
case .escaped(.endOfSubject): return .endOfSubject
561-
case .escaped(.textSegment): return .textSegment
562-
case .escaped(.notTextSegment): return .notTextSegment
563-
case .escaped(.endOfSubjectBeforeNewline):
564-
return .endOfSubjectBeforeNewline
565-
case .escaped(.firstMatchingPositionInSubject):
566-
return .firstMatchingPositionInSubject
567-
568-
case .escaped(.resetStartOfMatch): return .resetStartOfMatch
569-
570-
default: return nil
571-
}
572-
}
573-
}
574-
575514
extension AST.Atom {
576515
public enum Callout: Hashable {
577516
/// A PCRE callout written `(?C...)`
@@ -806,9 +745,9 @@ extension AST.Atom {
806745
// the AST? Or defer for the matching engine?
807746
return nil
808747

809-
case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
810-
.backreference, .subpattern, .callout, .backtrackingDirective,
811-
.changeMatchingOptions, .invalid:
748+
case .scalarSequence, .property, .dot, .caretAnchor,
749+
.dollarAnchor, .backreference, .subpattern, .callout,
750+
.backtrackingDirective, .changeMatchingOptions, .invalid:
812751
return nil
813752
}
814753
}
@@ -858,7 +797,7 @@ extension AST.Atom {
858797
case .keyboardMetaControl(let x):
859798
return "\\M-\\C-\(x)"
860799

861-
case .property, .escaped, .any, .startOfLine, .endOfLine,
800+
case .property, .escaped, .dot, .caretAnchor, .dollarAnchor,
862801
.backreference, .subpattern, .namedCharacter, .callout,
863802
.backtrackingDirective, .changeMatchingOptions, .invalid:
864803
return nil
@@ -874,7 +813,7 @@ extension AST.Atom {
874813
// TODO: Are callouts quantifiable?
875814
case .escaped(let esc):
876815
return esc.isQuantifiable
877-
case .startOfLine, .endOfLine:
816+
case .caretAnchor, .dollarAnchor:
878817
return false
879818
default:
880819
return true

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2073,9 +2073,9 @@ extension Parser {
20732073
p.unreachable("Should have lexed a group or group-like atom")
20742074

20752075
// (sometimes) special metacharacters
2076-
case ".": return customCC ? .char(".") : .any
2077-
case "^": return customCC ? .char("^") : .startOfLine
2078-
case "$": return customCC ? .char("$") : .endOfLine
2076+
case ".": return customCC ? .char(".") : .dot
2077+
case "^": return customCC ? .char("^") : .caretAnchor
2078+
case "$": return customCC ? .char("$") : .dollarAnchor
20792079

20802080
// Escaped
20812081
case "\\": return p.expectEscaped().value

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ extension RegexValidator {
221221
) {
222222
switch esc {
223223
case .resetStartOfMatch, .singleDataUnit, .trueAnychar,
224-
// '\N' needs to be emitted using 'emitAny'.
224+
// '\N' needs to be emitted using 'emitDot'.
225225
.notNewline:
226226
error(.unsupported("'\\\(esc.character)'"), at: loc)
227227

@@ -288,7 +288,7 @@ extension RegexValidator {
288288
at: atom.location)
289289
}
290290

291-
case .char, .scalar, .startOfLine, .endOfLine, .any:
291+
case .char, .scalar, .caretAnchor, .dollarAnchor, .dot:
292292
break
293293

294294
case .invalid:

Sources/_RegexParser/Regex/Printing/DumpAST.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,9 @@ extension AST.Atom {
153153
case .keyboardControl, .keyboardMeta, .keyboardMetaControl:
154154
fatalError("TODO")
155155

156-
case .any: return "."
157-
case .startOfLine: return "^"
158-
case .endOfLine: return "$"
156+
case .dot: return "."
157+
case .caretAnchor: return "^"
158+
case .dollarAnchor: return "$"
159159

160160
case .backreference(let r), .subpattern(let r):
161161
return "\(r._dumpBase)"

Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,6 @@ extension AST.Atom.Number {
237237

238238
extension AST.Atom {
239239
var _canonicalBase: String {
240-
if let anchor = self.assertionKind {
241-
return anchor.rawValue
242-
}
243240
if let lit = self.literalStringValue {
244241
// FIXME: We may have to re-introduce escapes
245242
// For example, `\.` will come back as "." instead
@@ -248,6 +245,10 @@ extension AST.Atom {
248245
return lit
249246
}
250247
switch self.kind {
248+
case .caretAnchor:
249+
return "^"
250+
case .dollarAnchor:
251+
return "$"
251252
case .escaped(let e):
252253
return "\\\(e.character)"
253254
case .backreference(let br):

0 commit comments

Comments
 (0)