Skip to content

Fix a few DSL APIs #580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 15, 2022
32 changes: 23 additions & 9 deletions Sources/RegexBuilder/Anchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,30 @@ public struct Anchor {

@available(SwiftStdlib 5.7, *)
extension Anchor: RegexComponent {
var baseAssertion: DSLTree._AST.AssertionKind {
var baseAssertion: DSLTree.Atom.Assertion {
switch kind {
case .startOfSubject: return .startOfSubject(isInverted)
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
case .endOfSubject: return .endOfSubject(isInverted)
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
case .startOfLine: return .startOfLine(isInverted)
case .endOfLine: return .endOfLine(isInverted)
case .wordBoundary: return .wordBoundary(isInverted)
case .startOfSubject:
// FIXME: Inverted?
return .startOfSubject
case .endOfSubjectBeforeNewline:
// FIXME: Inverted?
return .endOfSubjectBeforeNewline
case .endOfSubject:
// FIXME: Inverted?
return .endOfSubject
case .firstMatchingPositionInSubject:
// FIXME: Inverted?
return .firstMatchingPositionInSubject
case .textSegmentBoundary:
return isInverted ? .notTextSegment : .textSegment
case .startOfLine:
// FIXME: Inverted?
return .startOfLine
case .endOfLine:
// FIXME: Inverted?
return .endOfLine
case .wordBoundary:
return isInverted ? .notWordBoundary : .wordBoundary
}
}

Expand Down
4 changes: 4 additions & 0 deletions Sources/RegexBuilder/CharacterClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ extension RegexComponent where Self == CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
}

public static var anyNonNewline: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
}

public static var anyGraphemeCluster: CharacterClass {
.init(unconverted: ._anyGrapheme)
}
Expand Down
83 changes: 11 additions & 72 deletions Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ extension AST {
case namedCharacter(String)

/// .
case any
case dot

/// ^
case startOfLine
case caretAnchor

/// $
case endOfLine
case dollarAnchor

// References
case backreference(Reference)
Expand Down Expand Up @@ -104,9 +104,9 @@ extension AST.Atom {
case .callout(let v): return v
case .backtrackingDirective(let v): return v
case .changeMatchingOptions(let v): return v
case .any: return nil
case .startOfLine: return nil
case .endOfLine: return nil
case .dot: return nil
case .caretAnchor: return nil
case .dollarAnchor: return nil
case .invalid: return nil
}
}
Expand Down Expand Up @@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
}
}

extension AST.Atom {
/// Anchors and other built-in zero-width assertions.
public enum AssertionKind: String, Hashable {
/// \A
case startOfSubject = #"\A"#

/// \Z
case endOfSubjectBeforeNewline = #"\Z"#

/// \z
case endOfSubject = #"\z"#

/// \K
case resetStartOfMatch = #"\K"#

/// \G
case firstMatchingPositionInSubject = #"\G"#

/// \y
case textSegment = #"\y"#

/// \Y
case notTextSegment = #"\Y"#

/// ^
case startOfLine = #"^"#

/// $
case endOfLine = #"$"#

/// \b (from outside a custom character class)
case wordBoundary = #"\b"#

/// \B
case notWordBoundary = #"\B"#

}

public var assertionKind: AssertionKind? {
switch kind {
case .startOfLine: return .startOfLine
case .endOfLine: return .endOfLine

case .escaped(.wordBoundary): return .wordBoundary
case .escaped(.notWordBoundary): return .notWordBoundary
case .escaped(.startOfSubject): return .startOfSubject
case .escaped(.endOfSubject): return .endOfSubject
case .escaped(.textSegment): return .textSegment
case .escaped(.notTextSegment): return .notTextSegment
case .escaped(.endOfSubjectBeforeNewline):
return .endOfSubjectBeforeNewline
case .escaped(.firstMatchingPositionInSubject):
return .firstMatchingPositionInSubject

case .escaped(.resetStartOfMatch): return .resetStartOfMatch

default: return nil
}
}
}

extension AST.Atom {
public enum Callout: Hashable {
/// A PCRE callout written `(?C...)`
Expand Down Expand Up @@ -806,9 +745,9 @@ extension AST.Atom {
// the AST? Or defer for the matching engine?
return nil

case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
.backreference, .subpattern, .callout, .backtrackingDirective,
.changeMatchingOptions, .invalid:
case .scalarSequence, .property, .dot, .caretAnchor,
.dollarAnchor, .backreference, .subpattern, .callout,
.backtrackingDirective, .changeMatchingOptions, .invalid:
return nil
}
}
Expand Down Expand Up @@ -858,7 +797,7 @@ extension AST.Atom {
case .keyboardMetaControl(let x):
return "\\M-\\C-\(x)"

case .property, .escaped, .any, .startOfLine, .endOfLine,
case .property, .escaped, .dot, .caretAnchor, .dollarAnchor,
.backreference, .subpattern, .namedCharacter, .callout,
.backtrackingDirective, .changeMatchingOptions, .invalid:
return nil
Expand All @@ -874,7 +813,7 @@ extension AST.Atom {
// TODO: Are callouts quantifiable?
case .escaped(let esc):
return esc.isQuantifiable
case .startOfLine, .endOfLine:
case .caretAnchor, .dollarAnchor:
return false
default:
return true
Expand Down
6 changes: 3 additions & 3 deletions Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2073,9 +2073,9 @@ extension Parser {
p.unreachable("Should have lexed a group or group-like atom")

// (sometimes) special metacharacters
case ".": return customCC ? .char(".") : .any
case "^": return customCC ? .char("^") : .startOfLine
case "$": return customCC ? .char("$") : .endOfLine
case ".": return customCC ? .char(".") : .dot
case "^": return customCC ? .char("^") : .caretAnchor
case "$": return customCC ? .char("$") : .dollarAnchor

// Escaped
case "\\": return p.expectEscaped().value
Expand Down
4 changes: 2 additions & 2 deletions Sources/_RegexParser/Regex/Parse/Sema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ extension RegexValidator {
) {
switch esc {
case .resetStartOfMatch, .singleDataUnit, .trueAnychar,
// '\N' needs to be emitted using 'emitAny'.
// '\N' needs to be emitted using 'emitDot'.
.notNewline:
error(.unsupported("'\\\(esc.character)'"), at: loc)

Expand Down Expand Up @@ -288,7 +288,7 @@ extension RegexValidator {
at: atom.location)
}

case .char, .scalar, .startOfLine, .endOfLine, .any:
case .char, .scalar, .caretAnchor, .dollarAnchor, .dot:
break

case .invalid:
Expand Down
6 changes: 3 additions & 3 deletions Sources/_RegexParser/Regex/Printing/DumpAST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ extension AST.Atom {
case .keyboardControl, .keyboardMeta, .keyboardMetaControl:
fatalError("TODO")

case .any: return "."
case .startOfLine: return "^"
case .endOfLine: return "$"
case .dot: return "."
case .caretAnchor: return "^"
case .dollarAnchor: return "$"

case .backreference(let r), .subpattern(let r):
return "\(r._dumpBase)"
Expand Down
7 changes: 4 additions & 3 deletions Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,6 @@ extension AST.Atom.Number {

extension AST.Atom {
var _canonicalBase: String {
if let anchor = self.assertionKind {
return anchor.rawValue
}
if let lit = self.literalStringValue {
// FIXME: We may have to re-introduce escapes
// For example, `\.` will come back as "." instead
Expand All @@ -248,6 +245,10 @@ extension AST.Atom {
return lit
}
switch self.kind {
case .caretAnchor:
return "^"
case .dollarAnchor:
return "$"
case .escaped(let e):
return "\\\(e.character)"
case .backreference(let br):
Expand Down
Loading