Skip to content

Nominalize option methods #295

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 43 additions & 14 deletions Sources/_StringProcessing/Regex/Options.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,36 @@
@available(SwiftStdlib 5.7, *)
extension RegexComponent {
/// Returns a regular expression that ignores casing when matching.
public func ignoringCase(_ ignoreCase: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.caseInsensitive, addingIf: ignoreCase)
public func ignoresCase(_ ignoresCase: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.caseInsensitive, addingIf: ignoresCase)
}

/// Returns a regular expression that only matches ASCII characters as "word
/// characters".
public func usingASCIIWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
public func asciiOnlyWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.asciiOnlyWord, addingIf: useASCII)
}

/// Returns a regular expression that only matches ASCII characters as digits.
public func usingASCIIDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
public func asciiOnlyDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
}

/// Returns a regular expression that only matches ASCII characters as space
/// characters.
public func usingASCIISpaces(_ useASCII: Bool = true) -> Regex<RegexOutput> {
public func asciiOnlyWhitespace(_ useASCII: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.asciiOnlySpace, addingIf: useASCII)
}

/// Returns a regular expression that only matches ASCII characters when
/// matching character classes.
public func usingASCIICharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
public func asciiOnlyCharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.asciiOnlyPOSIXProps, addingIf: useASCII)
}

/// Returns a regular expression that uses the Unicode word boundary
/// algorithm.
///
/// This option is enabled by default; pass `false` to disable use of
/// Unicode's word boundary algorithm.
public func usingUnicodeWordBoundaries(_ useUnicodeWordBoundaries: Bool = true) -> Regex<RegexOutput> {
wrapInOption(.unicodeWordBoundaries, addingIf: useUnicodeWordBoundaries)
/// Returns a regular expression that uses the specified word boundary algorithm.
public func wordBoundaryKind(_ wordBoundaryKind: RegexWordBoundaryKind) -> Regex<RegexOutput> {
wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .unicodeLevel2)
}

/// Returns a regular expression where the start and end of input
Expand Down Expand Up @@ -133,6 +129,7 @@ extension RegexComponent {
}

@available(SwiftStdlib 5.7, *)
/// A semantic level to use during regex matching.
public struct RegexSemanticLevel: Hashable {
internal enum Representation {
case graphemeCluster
Expand All @@ -154,6 +151,38 @@ public struct RegexSemanticLevel: Hashable {
}
}

@available(SwiftStdlib 5.7, *)
/// A word boundary algorithm to use during regex matching.
public struct RegexWordBoundaryKind: Hashable {
internal enum Representation {
case unicodeLevel1
case unicodeLevel2
}

internal var base: Representation

/// A word boundary algorithm that implements the "simple word boundary"
/// Unicode recommendation.
///
/// A simple word boundary is a position in the input between two characters
/// that match `/\w\W/` or `/\W\w/`, or between the start or end of the input
/// and a `\w` character. Word boundaries therefore depend on the option-
/// defined behavior of `\w`.
public static var unicodeLevel1: Self {
.init(base: .unicodeLevel1)
}

/// A word boundary algorithm that implements the "default word boundary"
/// Unicode recommendation.
///
/// Default word boundaries use a Unicode algorithm that handles some cases
/// better than simple word boundaries, such as words with internal
/// punctuation, changes in script, and Emoji.
public static var unicodeLevel2: Self {
.init(base: .unicodeLevel2)
}
}

// MARK: - Helper method

@available(SwiftStdlib 5.7, *)
Expand Down
46 changes: 41 additions & 5 deletions Tests/RegexBuilderTests/RegexDSLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ class RegexDSLTests: XCTestCase {
matchType: Substring.self, ==) {
OneOrMore {
"abc"
}.ignoringCase(true)
}.ignoresCase(true)
}

// Multiple options on one component wrap successively, but do not
Expand All @@ -242,8 +242,8 @@ class RegexDSLTests: XCTestCase {
OneOrMore {
"abc"
}
.ignoringCase(true)
.ignoringCase(false)
.ignoresCase(true)
.ignoresCase(false)
}

// An option on an outer component doesn't override an option set on an
Expand All @@ -257,12 +257,36 @@ class RegexDSLTests: XCTestCase {
("abcdeABCdeaBcde", "abcdeABCdeaBcde"),
matchType: Substring.self, ==) {
OneOrMore {
"abc".ignoringCase(true)
"abc".ignoresCase(true)
Optionally("de")
}
.ignoringCase(false)
.ignoresCase(false)
}

#if os(macOS)
try XCTExpectFailure("Implement level 2 word boundaries") {
try _testDSLCaptures(
("can't stop won't stop", ("can't stop won't stop", "can't", "won")),
matchType: (Substring, Substring, Substring).self, ==) {
Capture {
OneOrMore(.word)
Anchor.wordBoundary
}
OneOrMore(.any, .reluctantly)
"stop"
" "

Capture {
OneOrMore(.word)
Anchor.wordBoundary
}
.wordBoundaryKind(.unicodeLevel1)
OneOrMore(.any, .reluctantly)
"stop"
}
}
#endif

try _testDSLCaptures(
("abcdef123", ("abcdef123", "a", "123")),
matchType: (Substring, Substring, Substring).self, ==) {
Expand All @@ -280,6 +304,18 @@ class RegexDSLTests: XCTestCase {
}
ZeroOrMore(.digit)
}

try _testDSLCaptures(
("abcdefg", ("abcdefg", "abcdefg")),
("abcdéfg", ("abcdéfg", "abcd")),
matchType: (Substring, Substring).self, ==) {
Capture {
OneOrMore(.word)
}
.asciiOnlyWordCharacters()

ZeroOrMore(.any)
}
}

func testQuantificationBehavior() throws {
Expand Down
2 changes: 1 addition & 1 deletion Tests/RegexTests/MatchTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,7 @@ extension RegexTests {
XCTAssertTrue ("cafe".contains(regex))
XCTAssertFalse("CaFe".contains(regex))

let caseInsensitiveRegex = regex.ignoringCase()
let caseInsensitiveRegex = regex.ignoresCase()
XCTAssertTrue("cafe".contains(caseInsensitiveRegex))
XCTAssertTrue("CaFe".contains(caseInsensitiveRegex))
}
Expand Down