swiftlang · natecook1000 · Jun 22, 2022 · Jun 17, 2022 · Jun 17, 2022 · Jun 17, 2022
diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Contains.swift
@@ -74,6 +74,6 @@ extension BidirectionalCollection where SubSequence == Substring {
   @_disfavoredOverload
   @available(SwiftStdlib 5.7, *)
   public func contains(_ regex: some RegexComponent) -> Bool {
-    _contains(RegexConsumer(regex))
+    (try? regex.regex.firstMatch(in: self[...])) != nil
   }
 }
diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -97,25 +97,25 @@ fileprivate extension Compiler.ByteCodeGen {
     switch kind {
     case .startOfSubject:
       builder.buildAssert { (input, pos, bounds) in
-        pos == input.startIndex
+        pos == bounds.lowerBound
       }
 
     case .endOfSubjectBeforeNewline:
       builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-        if pos == input.endIndex { return true }
+        if pos == bounds.upperBound { return true }
         switch semanticLevel {
         case .graphemeCluster:
-          return input.index(after: pos) == input.endIndex
+          return input.index(after: pos) == bounds.upperBound
            && input[pos].isNewline
         case .unicodeScalar:
-          return input.unicodeScalars.index(after: pos) == input.endIndex
+          return input.unicodeScalars.index(after: pos) == bounds.upperBound
            && input.unicodeScalars[pos].isNewline
         }
       }
 
     case .endOfSubject:
       builder.buildAssert { (input, pos, bounds) in
-        pos == input.endIndex
+        pos == bounds.upperBound
       }
 
     case .resetStartOfMatch:
@@ -124,9 +124,10 @@ fileprivate extension Compiler.ByteCodeGen {
 
     case .firstMatchingPositionInSubject:
       // TODO: We can probably build a nice model with API here
-      builder.buildAssert { (input, pos, bounds) in
-        pos == bounds.lowerBound
-      }
+
+      // FIXME: This needs to be based on `searchBounds`,
+      // not the `subjectBounds` given as an argument here
+      builder.buildAssert { (input, pos, bounds) in false }
 
     case .textSegment:
       builder.buildAssert { (input, pos, _) in
@@ -141,9 +142,10 @@ fileprivate extension Compiler.ByteCodeGen {
       }
 
     case .startOfLine:
+      // FIXME: Anchor.startOfLine must always use this first branch
       if options.anchorsMatchNewlines {
         builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-          if pos == input.startIndex { return true }
+          if pos == bounds.lowerBound { return true }
           switch semanticLevel {
           case .graphemeCluster:
             return input[input.index(before: pos)].isNewline
@@ -153,14 +155,15 @@ fileprivate extension Compiler.ByteCodeGen {
         }
       } else {
         builder.buildAssert { (input, pos, bounds) in
-          pos == input.startIndex
+          pos == bounds.lowerBound
         }
       }
 
     case .endOfLine:
+      // FIXME: Anchor.endOfLine must always use this first branch
       if options.anchorsMatchNewlines {
         builder.buildAssert { [semanticLevel = options.semanticLevel] (input, pos, bounds) in
-          if pos == input.endIndex { return true }
+          if pos == bounds.upperBound { return true }
           switch semanticLevel {
           case .graphemeCluster:
             return input[pos].isNewline
@@ -170,7 +173,7 @@ fileprivate extension Compiler.ByteCodeGen {
         }
       } else {
         builder.buildAssert { (input, pos, bounds) in
-          pos == input.endIndex
+          pos == bounds.upperBound
         }
       }
 

diff --git a/Sources/_StringProcessing/Engine/Consume.swift b/Sources/_StringProcessing/Engine/Consume.swift
@@ -18,10 +18,25 @@ extension Engine {
     Processor(
       program: program,
       input: input,
-      bounds: bounds,
+      subjectBounds: bounds,
+      searchBounds: bounds,
       matchMode: matchMode,
       isTracingEnabled: enableTracing)
   }
+
+  func makeFirstMatchProcessor(
+    input: Input,
+    subjectBounds: Range<Input.Index>,
+    searchBounds: Range<Input.Index>
+  ) -> Processor<Input> {
+    Processor(
+      program: program,
+      input: input,
+      subjectBounds: subjectBounds,
+      searchBounds: searchBounds,
+      matchMode: .partialFromFront,
+      isTracingEnabled: enableTracing)
+  }
 }
 
 extension Processor where Input == String {

diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
@@ -31,18 +31,36 @@ struct Processor<
 > where Input.Element: Equatable { // maybe Hashable?
   typealias Element = Input.Element
 
+  /// The base collection of the subject to search.
+  ///
+  /// Taken together, `input` and `subjectBounds` define the actual subject
+  /// of the search. `input` can be a "supersequence" of the subject, while
+  /// `input[subjectBounds]` is the logical entity that is being searched.
   let input: Input
+
+  /// The bounds of the logical subject in `input`.
+  ///
+  ///
+  /// `subjectBounds` is equal to or a subrange of
+  /// `input.startIndex..<input.endIndex`.
+  let subjectBounds: Range<Position>
+
   let matchMode: MatchMode
   let instructions: InstructionList<Instruction>
 
   // MARK: Resettable state
 
-  // The subject bounds.
-  //
-  // FIXME: This also conflates search bounds too!
-  var bounds: Range<Position>
-
-  // The current position in the subject
+  /// The bounds within the subject for an individual search.
+  ///
+  /// `searchBounds` is equal to `subjectBounds` in some cases, but can be a
+  /// subrange when performing operations like searching for matches iteratively
+  /// or calling `str.replacing(_:with:subrange:)`.
+  var searchBounds: Range<Position>
+
+  /// The current search position while processing.
+  ///
+  /// `currentPosition` must always be in the range `subjectBounds` or equal
+  /// to `subjectBounds.upperBound`.
   var currentPosition: Position
 
   var controller: Controller
@@ -59,53 +77,51 @@ struct Processor<
 
   var failureReason: Error? = nil
 
-
   // MARK: Metrics, debugging, etc.
   var cycleCount = 0
   var isTracingEnabled: Bool
-
 }
 
 extension Processor {
   typealias Position = Input.Index
 
-  var start: Position { bounds.lowerBound }
-  var end: Position { bounds.upperBound }
+  var start: Position { subjectBounds.lowerBound }
+  var end: Position { subjectBounds.upperBound }
 }
 
 extension Processor {
   init(
     program: MEProgram<Input>,
     input: Input,
-    bounds: Range<Position>,
+    subjectBounds: Range<Position>,
+    searchBounds: Range<Position>,
     matchMode: MatchMode,
     isTracingEnabled: Bool
   ) {
     self.controller = Controller(pc: 0)
     self.instructions = program.instructions
     self.input = input
-    self.bounds = bounds
+    self.subjectBounds = subjectBounds
+    self.searchBounds = searchBounds
     self.matchMode = matchMode
     self.isTracingEnabled = isTracingEnabled
-    self.currentPosition = bounds.lowerBound
+    self.currentPosition = searchBounds.lowerBound
 
-    self.registers = Registers(program, bounds.upperBound)
+    // Initialize registers with end of search bounds
+    self.registers = Registers(program, searchBounds.upperBound)
     self.storedCaptures = Array(
        repeating: .init(), count: program.registerInfo.captures)
 
     _checkInvariants()
   }
 
-
   mutating func reset(searchBounds: Range<Position>) {
-    // FIXME: We currently conflate both subject bounds and search bounds
-    // This should just reset search bounds
-    self.bounds = searchBounds
-    self.currentPosition = self.bounds.lowerBound
+    self.searchBounds = searchBounds
+    self.currentPosition = self.searchBounds.lowerBound
 
     self.controller = Controller(pc: 0)
 
-    self.registers.reset(sentinel: bounds.upperBound)
+    self.registers.reset(sentinel: searchBounds.upperBound)
 
     self.savePoints.removeAll(keepingCapacity: true)
     self.callStack.removeAll(keepingCapacity: true)
@@ -132,7 +148,7 @@ extension Processor {
   var slice: Input.SubSequence {
     // TODO: Should we whole-scale switch to slices, or
     // does that depend on options for some anchors?
-    input[bounds]
+    input[subjectBounds]
   }
 
   // Advance in our input, without any checks or failure signalling
@@ -161,8 +177,8 @@ extension Processor {
   /// - Precondition: `bounds.contains(index) || index == bounds.upperBound`
   /// - Precondition: `index >= currentPosition`
   mutating func resume(at index: Input.Index) {
-    assert(index >= bounds.lowerBound)
-    assert(index <= bounds.upperBound)
+    assert(index >= subjectBounds.lowerBound)
+    assert(index <= subjectBounds.upperBound)
     assert(index >= currentPosition)
     currentPosition = index
   }
@@ -233,7 +249,7 @@ extension Processor {
     switch (currentPosition, matchMode) {
     // When reaching the end of the match bounds or when we are only doing a
     // prefix match, transition to accept.
-    case (bounds.upperBound, _), (_, .partialFromFront):
+    case (subjectBounds.upperBound, _), (_, .partialFromFront):
       state = .accept
 
     // When we are doing a full match but did not reach the end of the match
@@ -411,9 +427,9 @@ extension Processor {
 
     case .consumeBy:
       let reg = payload.consumer
-      guard currentPosition < bounds.upperBound,
+      guard currentPosition < subjectBounds.upperBound,
             let nextIndex = registers[reg](
-              input, currentPosition..<bounds.upperBound)
+              input, currentPosition..<subjectBounds.upperBound)
       else {
         signalFailure()
         return
@@ -425,7 +441,7 @@ extension Processor {
       let reg = payload.assertion
       let assertion = registers[reg]
       do {
-        guard try assertion(input, currentPosition, bounds) else {
+        guard try assertion(input, currentPosition, subjectBounds) else {
           signalFailure()
           return
         }
@@ -440,7 +456,7 @@ extension Processor {
       let matcher = registers[matcherReg]
       do {
         guard let (nextIdx, val) = try matcher(
-          input, currentPosition, bounds
+          input, currentPosition, subjectBounds
         ) else {
           signalFailure()
           return

diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift
@@ -22,14 +22,17 @@ struct Executor {
   @available(SwiftStdlib 5.7, *)
   func firstMatch<Output>(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    subjectBounds: Range<String.Index>,
+    searchBounds: Range<String.Index>,
     graphemeSemantic: Bool
   ) throws -> Regex<Output>.Match? {
-    var cpu = engine.makeProcessor(
-      input: input, bounds: inputRange, matchMode: .partialFromFront)
+    var cpu = engine.makeFirstMatchProcessor(
+      input: input,
+      subjectBounds: subjectBounds,
+      searchBounds: searchBounds)
 
-    var low = inputRange.lowerBound
-    let high = inputRange.upperBound
+    var low = searchBounds.lowerBound
+    let high = searchBounds.upperBound
     while true {
       if let m: Regex<Output>.Match = try _match(
         input, in: low..<high, using: &cpu
@@ -49,18 +52,18 @@ struct Executor {
   @available(SwiftStdlib 5.7, *)
   func match<Output>(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    in subjectBounds: Range<String.Index>,
     _ mode: MatchMode
   ) throws -> Regex<Output>.Match? {
     var cpu = engine.makeProcessor(
-      input: input, bounds: inputRange, matchMode: mode)
-    return try _match(input, in: inputRange, using: &cpu)
+      input: input, bounds: subjectBounds, matchMode: mode)
+    return try _match(input, in: subjectBounds, using: &cpu)
   }
 
   @available(SwiftStdlib 5.7, *)
   func _match<Output>(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    in subjectBounds: Range<String.Index>,
     using cpu: inout Processor<String>
   ) throws -> Regex<Output>.Match? {
     guard let endIdx = cpu.consume() else {
@@ -74,7 +77,7 @@ struct Executor {
       values: cpu.storedCaptures,
       referencedCaptureOffsets: engine.program.referencedCaptureOffsets)
 
-    let range = inputRange.lowerBound..<endIdx
+    let range = subjectBounds.lowerBound..<endIdx
     let caps = engine.program.captureList.createElements(capList)
 
     let anyRegexOutput = AnyRegexOutput(input: input, elements: caps)
@@ -84,9 +87,9 @@ struct Executor {
   @available(SwiftStdlib 5.7, *)
   func dynamicMatch(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    in subjectBounds: Range<String.Index>,
     _ mode: MatchMode
   ) throws -> Regex<AnyRegexOutput>.Match? {
-    try match(input, in: inputRange, mode)
+    try match(input, in: subjectBounds, mode)
   }
 }
diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift
@@ -126,21 +126,32 @@ extension Regex {
 
   func _match(
     _ input: String,
-    in inputRange: Range<String.Index>,
+    in subjectBounds: Range<String.Index>,
     mode: MatchMode = .wholeString
   ) throws -> Regex<Output>.Match? {
     let executor = Executor(program: regex.program.loweredProgram)
-    return try executor.match(input, in: inputRange, mode)
+    return try executor.match(input, in: subjectBounds, mode)
   }
 
   func _firstMatch(
     _ input: String,
-    in inputRange: Range<String.Index>
+    in subjectBounds: Range<String.Index>
+  ) throws -> Regex<Output>.Match? {
+    try _firstMatch(input, subjectBounds: subjectBounds, searchBounds: subjectBounds)
+  }
+
+  func _firstMatch(
+    _ input: String,
+    subjectBounds: Range<String.Index>,
+    searchBounds: Range<String.Index>
   ) throws -> Regex<Output>.Match? {
     let executor = Executor(program: regex.program.loweredProgram)
     let graphemeSemantic = regex.initialOptions.semanticLevel == .graphemeCluster
     return try executor.firstMatch(
-      input, in: inputRange, graphemeSemantic: graphemeSemantic)
+      input,
+      subjectBounds: subjectBounds,
+      searchBounds: searchBounds,
+      graphemeSemantic: graphemeSemantic)
   }
 }