Skip to content

Commit 3cd121a

Browse files
committed
Merge branch 'main' into substantial_substrings
2 parents b331bc0 + ba6e49d commit 3cd121a

File tree

11 files changed

+232
-33
lines changed

11 files changed

+232
-33
lines changed

Package.swift

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([
1111
"-Xfrontend",
1212
"-define-availability",
1313
"-Xfrontend",
14-
"SwiftStdlib 5.8:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
14+
"SwiftStdlib 5.8:macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4",
15+
"-Xfrontend",
16+
"-define-availability",
17+
"-Xfrontend",
18+
"SwiftStdlib 5.9:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
1519
])
1620

1721
/// Swift settings for building a private stdlib-like module that is to be used
@@ -128,7 +132,8 @@ let package = Package(
128132
.product(name: "ArgumentParser", package: "swift-argument-parser"),
129133
"_RegexParser",
130134
"_StringProcessing"
131-
]),
135+
],
136+
swiftSettings: [availabilityDefinition]),
132137
.executableTarget(
133138
name: "RegexBenchmark",
134139
dependencies: [

Sources/RegexBenchmark/Suite/NotFound.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ extension BenchmarkRunner {
1313
baseName: "AnchoredNotFound",
1414
regex: "^ +a",
1515
input: input,
16-
isWhole: true)
16+
includeFirst: true)
1717
anchoredNotFound.register(&self)
1818
}
1919
}

Sources/RegexTester/RegexTester.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import _RegexParser
1414
import _StringProcessing
1515

1616
@main
17-
@available(macOS 9999, *)
17+
@available(SwiftStdlib 5.8, *)
1818
struct RegexTester: ParsableCommand {
1919
typealias MatchFunctionType = (String) throws -> Regex<AnyRegexOutput>.Match?
2020

Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,11 @@ extension RangeReplaceableCollection {
8181
// MARK: Fixed pattern algorithms
8282

8383
extension Collection where Element: Equatable {
84-
/// Returns a new collection of the same type by removing initial elements
85-
/// that satisfy the given predicate from the start.
86-
/// - Parameter predicate: A closure that takes an element of the sequence
87-
/// as its argument and returns a Boolean value indicating whether the
88-
/// element should be removed from the collection.
84+
/// Returns a new collection of the same type by removing `prefix` from the start
85+
/// of the collection.
86+
/// - Parameter prefix: The collection to remove from this collection.
8987
/// - Returns: A collection containing the elements of the collection that are
90-
/// not removed by `predicate`.
88+
/// not removed by `prefix`.
9189
@available(SwiftStdlib 5.7, *)
9290
public func trimmingPrefix<Prefix: Sequence>(
9391
_ prefix: Prefix
@@ -97,11 +95,8 @@ extension Collection where Element: Equatable {
9795
}
9896

9997
extension Collection where SubSequence == Self, Element: Equatable {
100-
/// Removes the initial elements that satisfy the given predicate from the
101-
/// start of the sequence.
102-
/// - Parameter predicate: A closure that takes an element of the sequence
103-
/// as its argument and returns a Boolean value indicating whether the
104-
/// element should be removed from the collection.
98+
/// Removes `prefix` from the start of the collection.
99+
/// - Parameter prefix: The collection to remove from this collection.
105100
@available(SwiftStdlib 5.7, *)
106101
public mutating func trimPrefix<Prefix: Sequence>(
107102
_ prefix: Prefix
@@ -111,11 +106,8 @@ extension Collection where SubSequence == Self, Element: Equatable {
111106
}
112107

113108
extension RangeReplaceableCollection where Element: Equatable {
114-
/// Removes the initial elements that satisfy the given predicate from the
115-
/// start of the sequence.
116-
/// - Parameter predicate: A closure that takes an element of the sequence
117-
/// as its argument and returns a Boolean value indicating whether the
118-
/// element should be removed from the collection.
109+
/// Removes `prefix` from the start of the collection.
110+
/// - Parameter prefix: The collection to remove from this collection.
119111
@available(SwiftStdlib 5.7, *)
120112
public mutating func trimPrefix<Prefix: Sequence>(
121113
_ prefix: Prefix
@@ -127,11 +119,11 @@ extension RangeReplaceableCollection where Element: Equatable {
127119
// MARK: Regex algorithms
128120

129121
extension BidirectionalCollection where SubSequence == Substring {
130-
/// Returns a new collection of the same type by removing `prefix` from the
131-
/// start.
132-
/// - Parameter prefix: The collection to remove from this collection.
122+
/// Returns a new collection of the same type by removing the initial elements
123+
/// that matches the given regex.
124+
/// - Parameter regex: The regex to remove from this collection.
133125
/// - Returns: A collection containing the elements that does not match
134-
/// `prefix` from the start.
126+
/// `regex` from the start.
135127
@_disfavoredOverload
136128
@available(SwiftStdlib 5.7, *)
137129
public func trimmingPrefix(_ regex: some RegexComponent) -> SubSequence {

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ extension Compiler.ByteCodeGen {
4646
// The whole match (`.0` element of output) is equivalent to an implicit
4747
// capture over the entire regex.
4848
try emitNode(.capture(name: nil, reference: nil, root))
49+
builder.canOnlyMatchAtStart = root.canOnlyMatchAtStart()
4950
builder.buildAccept()
5051
return try builder.assemble()
5152
}

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ extension MEProgram {
4343
var captureList = CaptureList()
4444
var initialOptions = MatchingOptions()
4545

46+
// Starting constraint
47+
var canOnlyMatchAtStart = false
48+
4649
// Symbolic reference resolution
4750
var unresolvedReferences: [ReferenceID: [InstructionAddress]] = [:]
4851
var referencedCaptureOffsets: [ReferenceID: Int] = [:]
@@ -404,7 +407,8 @@ extension MEProgram.Builder {
404407
enableMetrics: enableMetrics,
405408
captureList: captureList,
406409
referencedCaptureOffsets: referencedCaptureOffsets,
407-
initialOptions: initialOptions)
410+
initialOptions: initialOptions,
411+
canOnlyMatchAtStart: canOnlyMatchAtStart)
408412
}
409413

410414
mutating func reset() { self = Self() }

Sources/_StringProcessing/Engine/MEProgram.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ struct MEProgram {
3838
let referencedCaptureOffsets: [ReferenceID: Int]
3939

4040
var initialOptions: MatchingOptions
41+
var canOnlyMatchAtStart: Bool
4142
}
4243

4344
extension MEProgram: CustomStringConvertible {

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -711,6 +711,105 @@ extension DSLTree.Node {
711711
}
712712
}
713713

714+
extension DSLTree.Node {
715+
/// Implementation for `canOnlyMatchAtStart`, which maintains the option
716+
/// state.
717+
///
718+
/// For a given specific node, this method can return one of three values:
719+
///
720+
/// - `true`: This node is guaranteed to match only at the start of a subject.
721+
/// - `false`: This node can match anywhere in the subject.
722+
/// - `nil`: This node is inconclusive about where it can match.
723+
///
724+
/// In particular, non-required groups and option-setting groups are
725+
/// inconclusive about where they can match.
726+
private func _canOnlyMatchAtStartImpl(_ options: inout MatchingOptions) -> Bool? {
727+
switch self {
728+
// Defining cases
729+
case .atom(.assertion(.startOfSubject)):
730+
return true
731+
case .atom(.assertion(.caretAnchor)):
732+
return !options.anchorsMatchNewlines
733+
734+
// Changing options doesn't determine `true`/`false`.
735+
case .atom(.changeMatchingOptions(let sequence)):
736+
options.apply(sequence.ast)
737+
return nil
738+
739+
// Any other atom or consuming node returns `false`.
740+
case .atom, .customCharacterClass, .quotedLiteral:
741+
return false
742+
743+
// Trivia/empty have no effect.
744+
case .trivia, .empty:
745+
return nil
746+
747+
// In an alternation, all of its children must match only at start.
748+
case .orderedChoice(let children):
749+
return children.allSatisfy { $0._canOnlyMatchAtStartImpl(&options) == true }
750+
751+
// In a concatenation, the first definitive child provides the answer.
752+
case .concatenation(let children):
753+
for child in children {
754+
if let result = child._canOnlyMatchAtStartImpl(&options) {
755+
return result
756+
}
757+
}
758+
return false
759+
760+
// Groups (and other parent nodes) defer to the child.
761+
case .nonCapturingGroup(let kind, let child):
762+
options.beginScope()
763+
defer { options.endScope() }
764+
if case .changeMatchingOptions(let sequence) = kind.ast {
765+
options.apply(sequence)
766+
}
767+
return child._canOnlyMatchAtStartImpl(&options)
768+
case .capture(_, _, let child, _):
769+
options.beginScope()
770+
defer { options.endScope() }
771+
return child._canOnlyMatchAtStartImpl(&options)
772+
case .ignoreCapturesInTypedOutput(let child),
773+
.convertedRegexLiteral(let child, _):
774+
return child._canOnlyMatchAtStartImpl(&options)
775+
776+
// A quantification that doesn't require its child to exist can still
777+
// allow a start-only match. (e.g. `/(foo)?^bar/`)
778+
case .quantification(let amount, _, let child):
779+
return amount.requiresAtLeastOne
780+
? child._canOnlyMatchAtStartImpl(&options)
781+
: nil
782+
783+
// For conditional nodes, both sides must require matching at start.
784+
case .conditional(_, let child1, let child2):
785+
return child1._canOnlyMatchAtStartImpl(&options) == true
786+
&& child2._canOnlyMatchAtStartImpl(&options) == true
787+
788+
// Extended behavior isn't known, so we return `false` for safety.
789+
case .consumer, .matcher, .characterPredicate, .absentFunction:
790+
return false
791+
}
792+
}
793+
794+
/// Returns a Boolean value indicating whether the regex with this node as
795+
/// the root can _only_ match at the start of a subject.
796+
///
797+
/// For example, these regexes can only match at the start of a subject:
798+
///
799+
/// - `/^foo/`
800+
/// - `/(^foo|^bar)/` (both sides of the alternation start with `^`)
801+
///
802+
/// These can match other places in a subject:
803+
///
804+
/// - `/(^foo)?bar/` (`^` is in an optional group)
805+
/// - `/(^foo|bar)/` (only one side of the alternation starts with `^`)
806+
/// - `/(?m)^foo/` (`^` means "the start of a line" due to `(?m)`)
807+
internal func canOnlyMatchAtStart() -> Bool {
808+
var options = MatchingOptions()
809+
return _canOnlyMatchAtStartImpl(&options) ?? false
810+
}
811+
}
812+
714813
// MARK: AST wrapper types
715814
//
716815
// These wrapper types are required because even @_spi-marked public APIs can't
@@ -818,6 +917,17 @@ extension DSLTree {
818917
public static func range(_ lower: Int, _ upper: Int) -> Self {
819918
.init(ast: .range(.init(lower, at: .fake), .init(upper, at: .fake)))
820919
}
920+
921+
internal var requiresAtLeastOne: Bool {
922+
switch ast {
923+
case .zeroOrOne, .zeroOrMore, .upToN:
924+
return false
925+
case .oneOrMore:
926+
return true
927+
case .exactly(let num), .nOrMore(let num), .range(let num, _):
928+
return num.value.map { $0 > 0 } ?? false
929+
}
930+
}
821931
}
822932

823933
@_spi(RegexBuilder)

Sources/_StringProcessing/Regex/Match.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,9 @@ extension Regex {
273273
_ input: String,
274274
in subjectBounds: Range<String.Index>
275275
) throws -> Regex<Output>.Match? {
276-
try _firstMatch(input, subjectBounds: subjectBounds, searchBounds: subjectBounds)
276+
try regex.program.loweredProgram.canOnlyMatchAtStart
277+
? _match(input, in: subjectBounds, mode: .partialFromFront)
278+
: _firstMatch(input, subjectBounds: subjectBounds, searchBounds: subjectBounds)
277279
}
278280

279281
func _firstMatch(

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
//===----------------------------------------------------------------------===//
1111

1212
import XCTest
13-
import _StringProcessing
13+
@testable import _StringProcessing
1414
import RegexBuilder
1515
import TestSupport
1616

@@ -973,6 +973,55 @@ class RegexDSLTests: XCTestCase {
973973
}
974974
}
975975

976+
func testCanOnlyMatchAtStart() throws {
977+
func expectCanOnlyMatchAtStart(
978+
_ expectation: Bool,
979+
file: StaticString = #file, line: UInt = #line,
980+
@RegexComponentBuilder _ content: () -> some RegexComponent
981+
) {
982+
let regex = content().regex
983+
XCTAssertEqual(regex.program.loweredProgram.canOnlyMatchAtStart, expectation, file: file, line: line)
984+
}
985+
986+
expectCanOnlyMatchAtStart(true) {
987+
Anchor.startOfSubject
988+
"foo"
989+
}
990+
expectCanOnlyMatchAtStart(false) {
991+
"foo"
992+
}
993+
expectCanOnlyMatchAtStart(true) {
994+
Optionally { "foo" }
995+
Anchor.startOfSubject
996+
"bar"
997+
}
998+
999+
expectCanOnlyMatchAtStart(true) {
1000+
ChoiceOf {
1001+
Regex {
1002+
Anchor.startOfSubject
1003+
"foo"
1004+
}
1005+
Regex {
1006+
Anchor.startOfSubject
1007+
"bar"
1008+
}
1009+
}
1010+
}
1011+
expectCanOnlyMatchAtStart(false) {
1012+
ChoiceOf {
1013+
Regex {
1014+
Anchor.startOfSubject
1015+
"foo"
1016+
}
1017+
Regex {
1018+
Anchor.startOfLine
1019+
"bar"
1020+
}
1021+
}
1022+
}
1023+
}
1024+
9761025
func testNestedGroups() throws {
9771026
return;
9781027

@@ -1797,8 +1846,7 @@ extension RegexDSLTests {
17971846

17981847
func testLabeledCaptures_labeledCapture() throws {
17991848
guard #available(macOS 13, *) else {
1800-
XCTSkip("Fix only exists on macOS 13")
1801-
return
1849+
throw XCTSkip("Fix only exists on macOS 13")
18021850
}
18031851
// The output type of a regex with a labeled capture is dropped.
18041852
let dslWithLabeledCapture = Regex {
@@ -1837,8 +1885,7 @@ extension RegexDSLTests {
18371885

18381886
func testLabeledCaptures_bothCapture() throws {
18391887
guard #available(macOS 13, *) else {
1840-
XCTSkip("Fix only exists on macOS 13")
1841-
return
1888+
throw XCTSkip("Fix only exists on macOS 13")
18421889
}
18431890
// Only the output type of a regex with a labeled capture is dropped,
18441891
// outputs of other regexes in the same DSL are concatenated.
@@ -1864,8 +1911,7 @@ extension RegexDSLTests {
18641911

18651912
func testLabeledCaptures_tooManyCapture() throws {
18661913
guard #available(macOS 13, *) else {
1867-
XCTSkip("Fix only exists on macOS 13")
1868-
return
1914+
throw XCTSkip("Fix only exists on macOS 13")
18691915
}
18701916
// The output type of a regex with too many captures is dropped.
18711917
// "Too many" means the left and right output types would add up to >= 10.

0 commit comments

Comments
 (0)