Skip to content

[5.7] Fix a few DSL APIs #583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 23 additions & 9 deletions Sources/RegexBuilder/Anchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,30 @@ public struct Anchor {

@available(SwiftStdlib 5.7, *)
extension Anchor: RegexComponent {
var baseAssertion: DSLTree._AST.AssertionKind {
var baseAssertion: DSLTree.Atom.Assertion {
switch kind {
case .startOfSubject: return .startOfSubject(isInverted)
case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
case .endOfSubject: return .endOfSubject(isInverted)
case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
case .startOfLine: return .startOfLine(isInverted)
case .endOfLine: return .endOfLine(isInverted)
case .wordBoundary: return .wordBoundary(isInverted)
case .startOfSubject:
// FIXME: Inverted?
return .startOfSubject
case .endOfSubjectBeforeNewline:
// FIXME: Inverted?
return .endOfSubjectBeforeNewline
case .endOfSubject:
// FIXME: Inverted?
return .endOfSubject
case .firstMatchingPositionInSubject:
// FIXME: Inverted?
return .firstMatchingPositionInSubject
case .textSegmentBoundary:
return isInverted ? .notTextSegment : .textSegment
case .startOfLine:
// FIXME: Inverted?
return .startOfLine
case .endOfLine:
// FIXME: Inverted?
return .endOfLine
case .wordBoundary:
return isInverted ? .notWordBoundary : .wordBoundary
}
}

Expand Down
25 changes: 13 additions & 12 deletions Sources/RegexBuilder/CharacterClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,8 @@ public struct CharacterClass {
self.ccc = ccc
}

init(unconverted model: _CharacterClassModel) {
guard let ccc = model.makeDSLTreeCharacterClass() else {
fatalError("Unsupported character class")
}
self.ccc = ccc
init(unconverted atom: DSLTree._AST.Atom) {
self.ccc = .init(members: [.atom(.unconverted(atom))])
}
}

Expand All @@ -48,16 +45,20 @@ extension RegexComponent where Self == CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
}

public static var anyNonNewline: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
}

public static var anyGraphemeCluster: CharacterClass {
.init(unconverted: .anyGrapheme)
.init(unconverted: ._anyGrapheme)
}

public static var whitespace: CharacterClass {
.init(unconverted: .whitespace)
.init(unconverted: ._whitespace)
}

public static var digit: CharacterClass {
.init(unconverted: .digit)
.init(unconverted: ._digit)
}

public static var hexDigit: CharacterClass {
Expand All @@ -69,19 +70,19 @@ extension RegexComponent where Self == CharacterClass {
}

public static var horizontalWhitespace: CharacterClass {
.init(unconverted: .horizontalWhitespace)
.init(unconverted: ._horizontalWhitespace)
}

public static var newlineSequence: CharacterClass {
.init(unconverted: .newlineSequence)
.init(unconverted: ._newlineSequence)
}

public static var verticalWhitespace: CharacterClass {
.init(unconverted: .verticalWhitespace)
.init(unconverted: ._verticalWhitespace)
}

public static var word: CharacterClass {
.init(unconverted: .word)
.init(unconverted: ._word)
}
}

Expand Down
83 changes: 11 additions & 72 deletions Sources/_RegexParser/Regex/AST/Atom.swift
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ extension AST {
case namedCharacter(String)

/// .
case any
case dot

/// ^
case startOfLine
case caretAnchor

/// $
case endOfLine
case dollarAnchor

// References
case backreference(Reference)
Expand Down Expand Up @@ -104,9 +104,9 @@ extension AST.Atom {
case .callout(let v): return v
case .backtrackingDirective(let v): return v
case .changeMatchingOptions(let v): return v
case .any: return nil
case .startOfLine: return nil
case .endOfLine: return nil
case .dot: return nil
case .caretAnchor: return nil
case .dollarAnchor: return nil
case .invalid: return nil
}
}
Expand Down Expand Up @@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
}
}

extension AST.Atom {
/// Anchors and other built-in zero-width assertions.
public enum AssertionKind: String, Hashable {
/// \A
case startOfSubject = #"\A"#

/// \Z
case endOfSubjectBeforeNewline = #"\Z"#

/// \z
case endOfSubject = #"\z"#

/// \K
case resetStartOfMatch = #"\K"#

/// \G
case firstMatchingPositionInSubject = #"\G"#

/// \y
case textSegment = #"\y"#

/// \Y
case notTextSegment = #"\Y"#

/// ^
case startOfLine = #"^"#

/// $
case endOfLine = #"$"#

/// \b (from outside a custom character class)
case wordBoundary = #"\b"#

/// \B
case notWordBoundary = #"\B"#

}

public var assertionKind: AssertionKind? {
switch kind {
case .startOfLine: return .startOfLine
case .endOfLine: return .endOfLine

case .escaped(.wordBoundary): return .wordBoundary
case .escaped(.notWordBoundary): return .notWordBoundary
case .escaped(.startOfSubject): return .startOfSubject
case .escaped(.endOfSubject): return .endOfSubject
case .escaped(.textSegment): return .textSegment
case .escaped(.notTextSegment): return .notTextSegment
case .escaped(.endOfSubjectBeforeNewline):
return .endOfSubjectBeforeNewline
case .escaped(.firstMatchingPositionInSubject):
return .firstMatchingPositionInSubject

case .escaped(.resetStartOfMatch): return .resetStartOfMatch

default: return nil
}
}
}

extension AST.Atom {
public enum Callout: Hashable {
/// A PCRE callout written `(?C...)`
Expand Down Expand Up @@ -806,9 +745,9 @@ extension AST.Atom {
// the AST? Or defer for the matching engine?
return nil

case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
.backreference, .subpattern, .callout, .backtrackingDirective,
.changeMatchingOptions, .invalid:
case .scalarSequence, .property, .dot, .caretAnchor,
.dollarAnchor, .backreference, .subpattern, .callout,
.backtrackingDirective, .changeMatchingOptions, .invalid:
return nil
}
}
Expand Down Expand Up @@ -858,7 +797,7 @@ extension AST.Atom {
case .keyboardMetaControl(let x):
return "\\M-\\C-\(x)"

case .property, .escaped, .any, .startOfLine, .endOfLine,
case .property, .escaped, .dot, .caretAnchor, .dollarAnchor,
.backreference, .subpattern, .namedCharacter, .callout,
.backtrackingDirective, .changeMatchingOptions, .invalid:
return nil
Expand All @@ -874,7 +813,7 @@ extension AST.Atom {
// TODO: Are callouts quantifiable?
case .escaped(let esc):
return esc.isQuantifiable
case .startOfLine, .endOfLine:
case .caretAnchor, .dollarAnchor:
return false
default:
return true
Expand Down
6 changes: 3 additions & 3 deletions Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2073,9 +2073,9 @@ extension Parser {
p.unreachable("Should have lexed a group or group-like atom")

// (sometimes) special metacharacters
case ".": return customCC ? .char(".") : .any
case "^": return customCC ? .char("^") : .startOfLine
case "$": return customCC ? .char("$") : .endOfLine
case ".": return customCC ? .char(".") : .dot
case "^": return customCC ? .char("^") : .caretAnchor
case "$": return customCC ? .char("$") : .dollarAnchor

// Escaped
case "\\": return p.expectEscaped().value
Expand Down
4 changes: 2 additions & 2 deletions Sources/_RegexParser/Regex/Parse/Sema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ extension RegexValidator {
) {
switch esc {
case .resetStartOfMatch, .singleDataUnit, .trueAnychar,
// '\N' needs to be emitted using 'emitAny'.
// '\N' needs to be emitted using 'emitDot'.
.notNewline:
error(.unsupported("'\\\(esc.character)'"), at: loc)

Expand Down Expand Up @@ -288,7 +288,7 @@ extension RegexValidator {
at: atom.location)
}

case .char, .scalar, .startOfLine, .endOfLine, .any:
case .char, .scalar, .caretAnchor, .dollarAnchor, .dot:
break

case .invalid:
Expand Down
6 changes: 3 additions & 3 deletions Sources/_RegexParser/Regex/Printing/DumpAST.swift
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ extension AST.Atom {
case .keyboardControl, .keyboardMeta, .keyboardMetaControl:
fatalError("TODO")

case .any: return "."
case .startOfLine: return "^"
case .endOfLine: return "$"
case .dot: return "."
case .caretAnchor: return "^"
case .dollarAnchor: return "$"

case .backreference(let r), .subpattern(let r):
return "\(r._dumpBase)"
Expand Down
7 changes: 4 additions & 3 deletions Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,6 @@ extension AST.Atom.Number {

extension AST.Atom {
var _canonicalBase: String {
if let anchor = self.assertionKind {
return anchor.rawValue
}
if let lit = self.literalStringValue {
// FIXME: We may have to re-introduce escapes
// For example, `\.` will come back as "." instead
Expand All @@ -248,6 +245,10 @@ extension AST.Atom {
return lit
}
switch self.kind {
case .caretAnchor:
return "^"
case .dollarAnchor:
return "$"
case .escaped(let e):
return "\\\(e.character)"
case .backreference(let br):
Expand Down
Loading