-
Notifications
You must be signed in to change notification settings - Fork 49
Move CharacterClass
API into RegexBuilder
#254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cc232c5
ded4689
97c3c6d
cb6ae44
f470eb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This source file is part of the Swift.org open source project | ||
// | ||
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors | ||
// Licensed under Apache License v2.0 with Runtime Library Exception | ||
// | ||
// See https://swift.org/LICENSE.txt for license information | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
import _RegexParser | ||
@_spi(RegexBuilder) import _StringProcessing | ||
|
||
public struct CharacterClass { | ||
internal var ccc: DSLTree.CustomCharacterClass | ||
|
||
init(_ ccc: DSLTree.CustomCharacterClass) { | ||
self.ccc = ccc | ||
} | ||
|
||
init(unconverted model: _CharacterClassModel) { | ||
// FIXME: Implement in DSLTree instead of wrapping an AST atom | ||
switch model.makeAST() { | ||
case .atom(let atom): | ||
self.ccc = .init(members: [.atom(.unconverted(atom))]) | ||
default: | ||
fatalError("Unsupported _CharacterClassModel") | ||
} | ||
} | ||
|
||
init(property: AST.Atom.CharacterProperty) { | ||
// FIXME: Implement in DSLTree instead of wrapping an AST atom | ||
let astAtom = AST.Atom(.property(property), .fake) | ||
self.ccc = .init(members: [.atom(.unconverted(astAtom))]) | ||
} | ||
} | ||
|
||
extension CharacterClass: RegexComponent { | ||
public var regex: Regex<Substring> { | ||
return Regex(node: DSLTree.Node.customCharacterClass(ccc)) | ||
} | ||
} | ||
|
||
extension CharacterClass { | ||
public var inverted: CharacterClass { | ||
CharacterClass(ccc.inverted) | ||
} | ||
} | ||
|
||
extension RegexComponent where Self == CharacterClass { | ||
public static var any: CharacterClass { | ||
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)])) | ||
} | ||
|
||
public static var anyGrapheme: CharacterClass { | ||
.init(unconverted: .anyGrapheme) | ||
} | ||
|
||
public static var whitespace: CharacterClass { | ||
.init(unconverted: .whitespace) | ||
} | ||
|
||
public static var digit: CharacterClass { | ||
.init(unconverted: .digit) | ||
} | ||
|
||
public static var hexDigit: CharacterClass { | ||
.init(DSLTree.CustomCharacterClass(members: [ | ||
.range(.char("A"), .char("F")), | ||
.range(.char("a"), .char("f")), | ||
.range(.char("0"), .char("9")), | ||
])) | ||
} | ||
|
||
public static var horizontalWhitespace: CharacterClass { | ||
.init(unconverted: .horizontalWhitespace) | ||
} | ||
|
||
public static var newlineSequence: CharacterClass { | ||
.init(unconverted: .newlineSequence) | ||
} | ||
|
||
public static var verticalWhitespace: CharacterClass { | ||
.init(unconverted: .verticalWhitespace) | ||
} | ||
|
||
public static var word: CharacterClass { | ||
.init(unconverted: .word) | ||
} | ||
} | ||
|
||
extension RegexComponent where Self == CharacterClass { | ||
/// Returns a character class that matches any character in the given string | ||
/// or sequence. | ||
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass | ||
where S.Element == Character | ||
{ | ||
CharacterClass(DSLTree.CustomCharacterClass( | ||
members: s.map { .atom(.char($0)) })) | ||
} | ||
|
||
/// Returns a character class that matches any unicode scalar in the given | ||
/// sequence. | ||
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass | ||
where S.Element == UnicodeScalar | ||
{ | ||
CharacterClass(DSLTree.CustomCharacterClass( | ||
members: s.map { .atom(.scalar($0)) })) | ||
} | ||
} | ||
|
||
// Unicode properties | ||
extension CharacterClass { | ||
public static func generalCategory(_ category: Unicode.GeneralCategory) -> CharacterClass { | ||
guard let extendedCategory = category.extendedGeneralCategory else { | ||
fatalError("Unexpected general category") | ||
} | ||
return CharacterClass(property: | ||
.init(.generalCategory(extendedCategory), isInverted: false, isPOSIX: false)) | ||
} | ||
} | ||
|
||
/// Range syntax for characters in `CharacterClass`es. | ||
public func ...(lhs: Character, rhs: Character) -> CharacterClass { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be useful to define these operators as static methods on extensions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think(?) we get any benefit from making them static methods — that syntax is to support implementation of protocol requirements, which isn't the case here, and all operator functions end up in the global namespace in any case. |
||
let range: DSLTree.CustomCharacterClass.Member = .range(.char(lhs), .char(rhs)) | ||
let ccc = DSLTree.CustomCharacterClass(members: [range], isInverted: false) | ||
return CharacterClass(ccc) | ||
} | ||
|
||
/// Range syntax for unicode scalars in `CharacterClass`es. | ||
@_disfavoredOverload | ||
public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass { | ||
let range: DSLTree.CustomCharacterClass.Member = .range(.scalar(lhs), .scalar(rhs)) | ||
let ccc = DSLTree.CustomCharacterClass(members: [range], isInverted: false) | ||
return CharacterClass(ccc) | ||
} | ||
|
||
extension Unicode.GeneralCategory { | ||
var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? { | ||
switch self { | ||
case .uppercaseLetter: return .uppercaseLetter | ||
case .lowercaseLetter: return .lowercaseLetter | ||
case .titlecaseLetter: return .titlecaseLetter | ||
case .modifierLetter: return .modifierLetter | ||
case .otherLetter: return .otherLetter | ||
case .nonspacingMark: return .nonspacingMark | ||
case .spacingMark: return .spacingMark | ||
case .enclosingMark: return .enclosingMark | ||
case .decimalNumber: return .decimalNumber | ||
case .letterNumber: return .letterNumber | ||
case .otherNumber: return .otherNumber | ||
case .connectorPunctuation: return .connectorPunctuation | ||
case .dashPunctuation: return .dashPunctuation | ||
case .openPunctuation: return .openPunctuation | ||
case .closePunctuation: return .closePunctuation | ||
case .initialPunctuation: return .initialPunctuation | ||
case .finalPunctuation: return .finalPunctuation | ||
case .otherPunctuation: return .otherPunctuation | ||
case .mathSymbol: return .mathSymbol | ||
case .currencySymbol: return .currencySymbol | ||
case .modifierSymbol: return .modifierSymbol | ||
case .otherSymbol: return .otherSymbol | ||
case .spaceSeparator: return .spaceSeparator | ||
case .lineSeparator: return .lineSeparator | ||
case .paragraphSeparator: return .paragraphSeparator | ||
case .control: return .control | ||
case .format: return .format | ||
case .surrogate: return .surrogate | ||
case .privateUse: return .privateUse | ||
case .unassigned: return .unassigned | ||
@unknown default: return nil | ||
} | ||
} | ||
} | ||
|
||
// MARK: - Set algebra methods | ||
|
||
extension RegexComponent where Self == CharacterClass { | ||
public init(_ first: CharacterClass, _ rest: CharacterClass...) { | ||
if rest.isEmpty { | ||
self.init(first.ccc) | ||
} else { | ||
let members: [DSLTree.CustomCharacterClass.Member] = | ||
(CollectionOfOne(first) + rest).map { .custom($0.ccc) } | ||
self.init(.init(members: members)) | ||
} | ||
} | ||
} | ||
|
||
extension CharacterClass { | ||
public func union(_ other: CharacterClass) -> CharacterClass { | ||
CharacterClass(.init(members: [ | ||
.custom(self.ccc), | ||
.custom(other.ccc)])) | ||
} | ||
|
||
public func intersection(_ other: CharacterClass) -> CharacterClass { | ||
CharacterClass(.init(members: [ | ||
.intersection(self.ccc, other.ccc) | ||
])) | ||
} | ||
|
||
public func subtracting(_ other: CharacterClass) -> CharacterClass { | ||
CharacterClass(.init(members: [ | ||
.subtraction(self.ccc, other.ccc) | ||
])) | ||
} | ||
|
||
public func symmetricDifference(_ other: CharacterClass) -> CharacterClass { | ||
CharacterClass(.init(members: [ | ||
.symmetricDifference(self.ccc, other.ccc) | ||
])) | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.