swiftlang · natecook1000 · Apr 18, 2022 · Apr 18, 2022 · Apr 18, 2022 · Apr 18, 2022
diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift
@@ -14,40 +14,36 @@
 @available(SwiftStdlib 5.7, *)
 extension RegexComponent {
   /// Returns a regular expression that ignores casing when matching.
-  public func ignoringCase(_ ignoreCase: Bool = true) -> Regex<RegexOutput> {
-    wrapInOption(.caseInsensitive, addingIf: ignoreCase)
+  public func ignoresCase(_ ignoresCase: Bool = true) -> Regex<RegexOutput> {
+    wrapInOption(.caseInsensitive, addingIf: ignoresCase)
   }
 
   /// Returns a regular expression that only matches ASCII characters as "word
   /// characters".
-  public func usingASCIIWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
-    wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
+  public func asciiOnlyWordCharacters(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+    wrapInOption(.asciiOnlyWord, addingIf: useASCII)
   }
 
   /// Returns a regular expression that only matches ASCII characters as digits.
-  public func usingASCIIDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+  public func asciiOnlyDigits(_ useASCII: Bool = true) -> Regex<RegexOutput> {
     wrapInOption(.asciiOnlyDigit, addingIf: useASCII)
   }
 
   /// Returns a regular expression that only matches ASCII characters as space
   /// characters.
-  public func usingASCIISpaces(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+  public func asciiOnlyWhitespace(_ useASCII: Bool = true) -> Regex<RegexOutput> {
     wrapInOption(.asciiOnlySpace, addingIf: useASCII)
   }
 
   /// Returns a regular expression that only matches ASCII characters when
   /// matching character classes.
-  public func usingASCIICharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
+  public func asciiOnlyCharacterClasses(_ useASCII: Bool = true) -> Regex<RegexOutput> {
     wrapInOption(.asciiOnlyPOSIXProps, addingIf: useASCII)
   }
 
-  /// Returns a regular expression that uses the Unicode word boundary
-  /// algorithm.
-  ///
-  /// This option is enabled by default; pass `false` to disable use of
-  /// Unicode's word boundary algorithm.
-  public func usingUnicodeWordBoundaries(_ useUnicodeWordBoundaries: Bool = true) -> Regex<RegexOutput> {
-    wrapInOption(.unicodeWordBoundaries, addingIf: useUnicodeWordBoundaries)
+  /// Returns a regular expression that uses the specified word boundary algorithm.
+  public func wordBoundaryKind(_ wordBoundaryKind: RegexWordBoundaryKind) -> Regex<RegexOutput> {
+    wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .unicodeLevel2)
   }
 
   /// Returns a regular expression where the start and end of input
@@ -133,6 +129,7 @@ extension RegexComponent {
 }
 
 @available(SwiftStdlib 5.7, *)
+/// A semantic level to use during regex matching.
 public struct RegexSemanticLevel: Hashable {
   internal enum Representation {
     case graphemeCluster
@@ -154,6 +151,38 @@ public struct RegexSemanticLevel: Hashable {
   }
 }
 
+@available(SwiftStdlib 5.7, *)
+/// A word boundary algorithm to use during regex matching.
+public struct RegexWordBoundaryKind: Hashable {
+  internal enum Representation {
+    case unicodeLevel1
+    case unicodeLevel2
+  }
+
+  internal var base: Representation
+
+  /// A word boundary algorithm that implements the "simple word boundary"
+  /// Unicode recommendation.
+  ///
+  /// A simple word boundary is a position in the input between two characters
+  /// that match `/\w\W/` or `/\W\w/`, or between the start or end of the input
+  /// and a `\w` character. Word boundaries therefore depend on the option-
+  /// defined behavior of `\w`.
+  public static var unicodeLevel1: Self {
+    .init(base: .unicodeLevel1)
+  }
+
+  /// A word boundary algorithm that implements the "default word boundary"
+  /// Unicode recommendation.
+  ///
+  /// Default word boundaries use a Unicode algorithm that handles some cases
+  /// better than simple word boundaries, such as words with internal
+  /// punctuation, changes in script, and Emoji.
+  public static var unicodeLevel2: Self {
+    .init(base: .unicodeLevel2)
+  }
+}
+
 // MARK: - Helper method
 
 @available(SwiftStdlib 5.7, *)

diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -228,7 +228,7 @@ class RegexDSLTests: XCTestCase {
       matchType: Substring.self, ==) {
         OneOrMore {
           "abc"
-        }.ignoringCase(true)
+        }.ignoresCase(true)
       }
 
     // Multiple options on one component wrap successively, but do not
@@ -242,8 +242,8 @@ class RegexDSLTests: XCTestCase {
         OneOrMore {
           "abc"
         }
-        .ignoringCase(true)
-        .ignoringCase(false)
+        .ignoresCase(true)
+        .ignoresCase(false)
       }
 
     // An option on an outer component doesn't override an option set on an
@@ -257,12 +257,36 @@ class RegexDSLTests: XCTestCase {
       ("abcdeABCdeaBcde", "abcdeABCdeaBcde"),
       matchType: Substring.self, ==) {
         OneOrMore {
-          "abc".ignoringCase(true)
+          "abc".ignoresCase(true)
           Optionally("de")
         }
-        .ignoringCase(false)
+        .ignoresCase(false)
       }
 
+#if os(macOS)
+    try XCTExpectFailure("Implement level 2 word boundaries") {
+      try _testDSLCaptures(
+        ("can't stop won't stop", ("can't stop won't stop", "can't", "won")),
+        matchType: (Substring, Substring, Substring).self, ==) {
+          Capture {
+            OneOrMore(.word)
+            Anchor.wordBoundary
+          }
+          OneOrMore(.any, .reluctantly)
+          "stop"
+          " "
+
+          Capture {
+            OneOrMore(.word)
+            Anchor.wordBoundary
+          }
+          .wordBoundaryKind(.unicodeLevel1)
+          OneOrMore(.any, .reluctantly)
+          "stop"
+        }
+    }
+#endif
+
     try _testDSLCaptures(
       ("abcdef123", ("abcdef123", "a", "123")),
       matchType: (Substring, Substring, Substring).self, ==) {
@@ -280,6 +304,18 @@ class RegexDSLTests: XCTestCase {
         }
         ZeroOrMore(.digit)
       }
+
+    try _testDSLCaptures(
+      ("abcdefg", ("abcdefg", "abcdefg")),
+      ("abcdéfg", ("abcdéfg", "abcd")),
+      matchType: (Substring, Substring).self, ==) {
+        Capture {
+          OneOrMore(.word)
+        }
+        .asciiOnlyWordCharacters()
+
+        ZeroOrMore(.any)
+      }
   }
 
   func testQuantificationBehavior() throws {

diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
@@ -1337,7 +1337,7 @@ extension RegexTests {
     XCTAssertTrue ("cafe".contains(regex))
     XCTAssertFalse("CaFe".contains(regex))
 
-    let caseInsensitiveRegex = regex.ignoringCase()
+    let caseInsensitiveRegex = regex.ignoresCase()
     XCTAssertTrue("cafe".contains(caseInsensitiveRegex))
     XCTAssertTrue("CaFe".contains(caseInsensitiveRegex))
   }