swiftlang · natecook1000 · May 9, 2022 · May 6, 2022 · May 6, 2022 · May 9, 2022
diff --git a/Sources/_StringProcessing/Unicode/ScalarProps.swift b/Sources/_StringProcessing/Unicode/ScalarProps.swift
@@ -46,3 +46,19 @@ extension Unicode.Script {
     return result
   }
 }
+
+extension UnicodeScalar {
+  var isHorizontalWhitespace: Bool {
+    value == 0x09 || properties.generalCategory == .spaceSeparator
+  }
+
+  var isVerticalWhitespace: Bool {
+    switch value {
+      case 0x000A...0x000D /* LF ... CR */: return true
+      case 0x0085 /* NEXT LINE (NEL) */: return true
+      case 0x2028 /* LINE SEPARATOR */: return true
+      case 0x2029 /* PARAGRAPH SEPARATOR */: return true
+      default: return false
+    }
+  }
+}
diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift
@@ -178,15 +178,18 @@ public struct _CharacterClassModel: Hashable {
         matched = c.isNumber && (c.isASCII || !options.usesASCIIDigits)
       case .hexDigit:
         matched = c.isHexDigit && (c.isASCII || !options.usesASCIIDigits)
-      case .horizontalWhitespace: fatalError("Not implemented")
-      case .newlineSequence:
-        matched = c.isNewline && (c.isASCII || !options.usesASCIISpaces)
-      case .verticalWhitespace: fatalError("Not implemented")
+      case .horizontalWhitespace:
+        matched = c.unicodeScalars.first?.isHorizontalWhitespace == true
+          && (c.isASCII || !options.usesASCIISpaces)
+      case .newlineSequence, .verticalWhitespace:
+        matched = c.unicodeScalars.first?.isVerticalWhitespace == true
+          && (c.isASCII || !options.usesASCIISpaces)
       case .whitespace:
         matched = c.isWhitespace && (c.isASCII || !options.usesASCIISpaces)
       case .word:
         matched = c.isWordCharacter && (c.isASCII || !options.usesASCIIWord)
-      case .custom(let set): matched = set.any { $0.matches(c, with: options) }
+      case .custom(let set):
+        matched = set.any { $0.matches(c, with: options) }
       }
       if isInverted {
         matched.toggle()
@@ -206,14 +209,21 @@ public struct _CharacterClassModel: Hashable {
         matched = c.properties.numericType != nil && (c.isASCII || !options.usesASCIIDigits)
       case .hexDigit:
         matched = Character(c).isHexDigit && (c.isASCII || !options.usesASCIIDigits)
-      case .horizontalWhitespace: fatalError("Not implemented")
-      case .newlineSequence: fatalError("Not implemented")
-      case .verticalWhitespace: fatalError("Not implemented")
+      case .horizontalWhitespace:
+        matched = c.isHorizontalWhitespace && (c.isASCII || !options.usesASCIISpaces)
+      case .verticalWhitespace:
+        matched = c.isVerticalWhitespace && (c.isASCII || !options.usesASCIISpaces)
+      case .newlineSequence:
+        matched = c.isVerticalWhitespace && (c.isASCII || !options.usesASCIISpaces)
+        if c == "\r" && nextIndex != str.endIndex && str.unicodeScalars[nextIndex] == "\n" {
+          str.unicodeScalars.formIndex(after: &nextIndex)
+        }
       case .whitespace:
         matched = c.properties.isWhitespace && (c.isASCII || !options.usesASCIISpaces)
       case .word:
         matched = (c.properties.isAlphabetic || c == "_") && (c.isASCII || !options.usesASCIIWord)
-      case .custom: fatalError("Not supported")
+      case .custom(let set):
+        matched = set.any { $0.matches(Character(c), with: options) }
       }
       if isInverted {
         matched.toggle()

diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift
@@ -268,7 +268,10 @@ extension UTS18Tests {
     var lines = lineInput.matches(of: regex(#"\d{2}"#))
     XCTAssertEqual(lines.count, 11)
     // Test \R - newline sequence
-    lines = lineInput.matches(of: regex(#"\d{2}\R"#))
+    lines = lineInput.matches(of: regex(#"\d{2}\R^"#).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 11)
+    // Test \v - vertical space
+    lines = lineInput.matches(of: regex(#"\d{2}\v^"#).anchorsMatchLineEndings())
     XCTAssertEqual(lines.count, 11)
     // Test anchors as line boundaries
     lines = lineInput.matches(of: regex(#"^\d{2}$"#).anchorsMatchLineEndings())
@@ -277,6 +280,15 @@ extension UTS18Tests {
     lines = lineInput.matches(of: regex(#".+"#))
     XCTAssertEqual(lines.count, 11)
 
+    // Unicode scalar semantics - \R still matches all, including \r\n sequence
+    lines = lineInput.matches(
+      of: regex(#"\d{2}\R^"#).matchingSemantics(.unicodeScalar).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 11)
+    // Unicode scalar semantics - \v matches all except for \r\n sequence
+    lines = lineInput.matches(
+      of: regex(#"\d{2}\v^"#).matchingSemantics(.unicodeScalar).anchorsMatchLineEndings())
+    XCTAssertEqual(lines.count, 10)
+
     // Does not contain an empty line
     XCTAssertFalse(lineInput.contains(regex(#"^$"#)))
     // Does contain an empty line (between \n and \r, which are reversed here)