swiftlang · milseman · Apr 11, 2023 · Apr 10, 2023 · natecook1000 · Apr 11, 2023
diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift
@@ -115,8 +115,69 @@ extension Processor {
   }
 }
 
-// MARK: Built-in character class matching
+// MARK: Matching `.`
+extension String {
+
+  func _matchAnyNonNewline(
+    at currentPosition: String.Index,
+    isScalarSemantics: Bool
+  ) -> String.Index? {
+    guard currentPosition < endIndex else {
+      return nil
+    }
+    if case .definite(let result) = _quickMatchAnyNonNewline(
+      at: currentPosition,
+      isScalarSemantics: isScalarSemantics
+    ) {
+      assert(result == _thoroughMatchAnyNonNewline(
+        at: currentPosition,
+        isScalarSemantics: isScalarSemantics))
+      return result
+    }
+    return _thoroughMatchAnyNonNewline(
+      at: currentPosition,
+      isScalarSemantics: isScalarSemantics)
+  }
+
+  @inline(__always)
+  func _quickMatchAnyNonNewline(
+    at currentPosition: String.Index,
+    isScalarSemantics: Bool
+  ) -> QuickResult<String.Index?> {
+    assert(currentPosition < endIndex)
+    guard let (asciiValue, next, isCRLF) = _quickASCIICharacter(
+      at: currentPosition
+    ) else {
+      return .unknown
+    }
+    switch asciiValue {
+      case ._lineFeed, ._carriageReturn:
+        return .definite(nil)
+      default:
+        assert(!isCRLF)
+        return .definite(next)
+    }
+  }
+
+  @inline(never)
+  func _thoroughMatchAnyNonNewline(
+    at currentPosition: String.Index,
+    isScalarSemantics: Bool
+  ) -> String.Index? {
+    assert(currentPosition < endIndex)
+    if isScalarSemantics {
+      let scalar = unicodeScalars[currentPosition]
+      guard !scalar.isNewline else { return nil }
+      return unicodeScalars.index(after: currentPosition)
+    }
+
+    let char = self[currentPosition]
+    guard !char.isNewline else { return nil }
+    return index(after: currentPosition)
+  }
+}
 
+// MARK: - Built-in character class matching
 extension String {
 
   // Mentioned in ProgrammersManual.md, update docs if redesigned

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
@@ -346,23 +346,18 @@ extension Processor {
     return true
   }
 
-  // Matches the next character if it is not a newline
-  mutating func matchAnyNonNewline() -> Bool {
-    guard let c = load(), !c.isNewline else {
-      signalFailure()
-      return false
-    }
-    _uncheckedForcedConsumeOne()
-    return true
-  }
-
-  // Matches the next scalar if it is not a newline
-  mutating func matchAnyNonNewlineScalar() -> Bool {
-    guard let s = loadScalar(), !s.isNewline else {
+  // Matches the next character/scalar if it is not a newline
+  mutating func matchAnyNonNewline(
+    isScalarSemantics: Bool
+  ) -> Bool {
+    guard let next = input._matchAnyNonNewline(
+      at: currentPosition,
+      isScalarSemantics: isScalarSemantics
+    ) else {
       signalFailure()
       return false
     }
-    input.unicodeScalars.formIndex(after: &currentPosition)
+    currentPosition = next
     return true
   }
 
@@ -535,14 +530,8 @@ extension Processor {
         }
       }
     case .matchAnyNonNewline:
-      if payload.isScalar {
-        if matchAnyNonNewlineScalar() {
-          controller.step()
-        }
-      } else {
-        if matchAnyNonNewline() {
-          controller.step()
-        }
+      if matchAnyNonNewline(isScalarSemantics: payload.isScalar) {
+        controller.step()
       }
     case .match:
       let (isCaseInsensitive, reg) = payload.elementPayload

diff --git a/Sources/_StringProcessing/Unicode/ASCII.swift b/Sources/_StringProcessing/Unicode/ASCII.swift
@@ -9,26 +9,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-private var _lineFeed: UInt8 { 0x0A }
-private var _carriageReturn: UInt8 { 0x0D }
-private var _lineTab: UInt8 { 0x0B }
-private var _formFeed: UInt8 { 0x0C }
-private var _space: UInt8 { 0x20 }
-private var _tab: UInt8 { 0x09 }
+extension UInt8 {
+  static var _lineFeed: UInt8 { 0x0A }
+  static var _carriageReturn: UInt8 { 0x0D }
+  static var _lineTab: UInt8 { 0x0B }
+  static var _formFeed: UInt8 { 0x0C }
+  static var _space: UInt8 { 0x20 }
+  static var _tab: UInt8 { 0x09 }
+
+  static var _underscore: UInt8 { 0x5F }
+}
 
 private var _0: UInt8 { 0x30 }
 private var _9: UInt8 { 0x39 }
-private func _isASCIINumber(_ x: UInt8) -> Bool {
-  return (_0..._9).contains(x)
-}
 
 private var _a: UInt8 { 0x61 }
 private var _z: UInt8 { 0x7A }
 private var _A: UInt8 { 0x41 }
 private var _Z: UInt8 { 0x5A }
 
-private var _underscore: UInt8 { 0x5F }
-
 extension UInt8 {
   var _isASCII: Bool { self < 0x80 }
 
@@ -43,14 +42,14 @@ extension UInt8 {
   /// Assuming we're ASCII, whether we match `\h`
   var _asciiIsHorizontalWhitespace: Bool {
     assert(_isASCII)
-    return self == _space || self == _tab
+    return self == ._space || self == ._tab
   }
 
   /// Assuming we're ASCII, whether we match `\v`
   var _asciiIsVerticalWhitespace: Bool {
     assert(_isASCII)
     switch self {
-    case _lineFeed, _carriageReturn, _lineTab, _formFeed:
+    case ._lineFeed, ._carriageReturn, ._lineTab, ._formFeed:
       return true
     default:
       return false
@@ -61,7 +60,7 @@ extension UInt8 {
   var _asciiIsWhitespace: Bool {
     assert(_isASCII)
     switch self {
-    case _space, _tab, _lineFeed, _lineTab, _formFeed, _carriageReturn:
+    case ._space, ._tab, ._lineFeed, ._lineTab, ._formFeed, ._carriageReturn:
       return true
     default:
       return false
@@ -77,11 +76,13 @@ extension UInt8 {
   /// Assuming we're ASCII, whether we match `\w`
   var _asciiIsWord: Bool {
     assert(_isASCII)
-    return _asciiIsDigit || _asciiIsLetter || self == _underscore
+    return _asciiIsDigit || _asciiIsLetter || self == ._underscore
   }
 }
 
 extension String {
+  /// TODO: better to take isScalarSemantics parameter, we can return more results
+  /// and we can give the right `next` index, not requiring the caller to re-adjust it
   /// TODO: detailed description of nuanced semantics
   func _quickASCIICharacter(
     at idx: Index
@@ -107,7 +108,7 @@ extension String {
     guard tail._isSub300StartingByte else { return nil }
 
     // Handle CR-LF:
-    if base == _carriageReturn && tail == _lineFeed {
+    if base == ._carriageReturn && tail == ._lineFeed {
       utf8.formIndex(after: &next)
       guard next == endIndex || utf8[next]._isSub300StartingByte else {
         return nil
@@ -165,5 +166,6 @@ extension String {
       return (next, asciiValue._asciiIsWord)
     }
   }
+
 }