Skip to content

Commit 535cd83

Browse files
authored
Merge pull request swiftlang#381 from hamishknight/5.7-merge-part-2
2 parents 9b870d2 + b712d58 commit 535cd83

36 files changed

+1715
-902
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
.DS_Store
22

3+
# The current toolchain is dumping files in the package root, rude
4+
*.emit-module.*
5+
36
# Xcode
47
#
58
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore

Documentation/Evolution/ProposalOverview.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
## Regex Type and Overview
55

6+
- [Second review](https://forums.swift.org/t/se-0350-second-review-regex-type-and-overview/56886)
67
- [Proposal](https://github.com/apple/swift-evolution/blob/main/proposals/0350-regex-type-overview.md), [Thread](https://forums.swift.org/t/se-0350-regex-type-and-overview/56530)
78
- [Pitch thread](https://forums.swift.org/t/pitch-regex-type-and-overview/56029)
89

Sources/PatternConverter/PatternConverter.swift

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ struct PatternConverter: ParsableCommand {
3030
@Flag(help: "Whether to show canonical regex literal")
3131
var showCanonical: Bool = false
3232

33-
@Flag(help: "Whether to show capture structure")
34-
var showCaptureStructure: Bool = false
35-
3633
@Flag(help: "Whether to skip result builder DSL")
3734
var skipDSL: Bool = false
3835

@@ -71,13 +68,6 @@ struct PatternConverter: ParsableCommand {
7168
print()
7269
}
7370

74-
if showCaptureStructure {
75-
print("Capture structure:")
76-
print()
77-
print(ast.captureStructure)
78-
print()
79-
}
80-
8171
print()
8272
if !skipDSL {
8373
let render = ast.renderAsBuilderDSL(

Sources/RegexBuilder/Anchor.swift

Lines changed: 88 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@
1212
@_implementationOnly import _RegexParser
1313
@_spi(RegexBuilder) import _StringProcessing
1414

15+
/// A regex component that matches a specific condition at a particular position
16+
/// in an input string.
17+
///
18+
/// You can use anchors to guarantee that a match only occurs at certain points
19+
/// in an input string, such as at the beginning of the string or at the end of
20+
/// a line.
1521
@available(SwiftStdlib 5.7, *)
1622
public struct Anchor {
1723
internal enum Kind {
@@ -53,14 +59,24 @@ extension Anchor: RegexComponent {
5359

5460
@available(SwiftStdlib 5.7, *)
5561
extension Anchor {
62+
/// An anchor that matches at the start of the input string.
63+
///
64+
/// This anchor is equivalent to `\A` in regex syntax.
5665
public static var startOfSubject: Anchor {
5766
Anchor(kind: .startOfSubject)
5867
}
59-
68+
69+
/// An anchor that matches at the end of the input string or at the end of
70+
/// the line immediately before the the end of the string.
71+
///
72+
/// This anchor is equivalent to `\Z` in regex syntax.
6073
public static var endOfSubjectBeforeNewline: Anchor {
6174
Anchor(kind: .endOfSubjectBeforeNewline)
6275
}
63-
76+
77+
/// An anchor that matches at the end of the input string.
78+
///
79+
/// This anchor is equivalent to `\z` in regex syntax.
6480
public static var endOfSubject: Anchor {
6581
Anchor(kind: .endOfSubject)
6682
}
@@ -70,33 +86,67 @@ extension Anchor {
7086
// Anchor(kind: resetStartOfMatch)
7187
// }
7288

89+
/// An anchor that matches at the first position of a match in the input
90+
/// string.
7391
public static var firstMatchingPositionInSubject: Anchor {
7492
Anchor(kind: .firstMatchingPositionInSubject)
7593
}
7694

95+
/// An anchor that matches at a grapheme cluster boundary.
96+
///
97+
/// This anchor is equivalent to `\y` in regex syntax.
7798
public static var textSegmentBoundary: Anchor {
7899
Anchor(kind: .textSegmentBoundary)
79100
}
80101

102+
/// An anchor that matches at the start of a line, including the start of
103+
/// the input string.
104+
///
105+
/// This anchor is equivalent to `^` in regex syntax when the `m` option
106+
/// has been enabled or `anchorsMatchLineEndings(true)` has been called.
81107
public static var startOfLine: Anchor {
82108
Anchor(kind: .startOfLine)
83109
}
84110

111+
/// An anchor that matches at the end of a line, including at the end of
112+
/// the input string.
113+
///
114+
/// This anchor is equivalent to `$` in regex syntax when the `m` option
115+
/// has been enabled or `anchorsMatchLineEndings(true)` has been called.
85116
public static var endOfLine: Anchor {
86117
Anchor(kind: .endOfLine)
87118
}
88119

120+
/// An anchor that matches at a word boundary.
121+
///
122+
/// Word boundaries are identified using the Unicode default word boundary
123+
/// algorithm by default. To specify a different word boundary algorithm,
124+
/// see the `RegexComponent.wordBoundaryKind(_:)` method.
125+
///
126+
/// This anchor is equivalent to `\b` in regex syntax.
89127
public static var wordBoundary: Anchor {
90128
Anchor(kind: .wordBoundary)
91129
}
92130

131+
/// The inverse of this anchor, which matches at every position that this
132+
/// anchor does not.
133+
///
134+
/// For the `wordBoundary` and `textSegmentBoundary` anchors, the inverted
135+
/// version corresponds to `\B` and `\Y`, respectively.
93136
public var inverted: Anchor {
94137
var result = self
95138
result.isInverted.toggle()
96139
return result
97140
}
98141
}
99142

143+
/// A regex component that allows a match to continue only if its contents
144+
/// match at the given location.
145+
///
146+
/// A lookahead is a zero-length assertion that its included regex matches at
147+
/// a particular position. Lookaheads do not advance the overall matching
148+
/// position in the input string — once a lookahead succeeds, matching continues
149+
/// in the regex from the same position.
100150
@available(SwiftStdlib 5.7, *)
101151
public struct Lookahead<Output>: _BuiltinRegexComponent {
102152
public var regex: Regex<Output>
@@ -105,19 +155,48 @@ public struct Lookahead<Output>: _BuiltinRegexComponent {
105155
self.regex = regex
106156
}
107157

158+
/// Creates a lookahead from the given regex component.
108159
public init<R: RegexComponent>(
109-
_ component: R,
110-
negative: Bool = false
160+
_ component: R
111161
) where R.RegexOutput == Output {
112-
self.init(node: .nonCapturingGroup(
113-
negative ? .negativeLookahead : .lookahead, component.regex.root))
162+
self.init(node: .nonCapturingGroup(.lookahead, component.regex.root))
114163
}
164+
165+
/// Creates a lookahead from the regex generated by the given builder closure.
166+
public init<R: RegexComponent>(
167+
@RegexComponentBuilder _ component: () -> R
168+
) where R.RegexOutput == Output {
169+
self.init(node: .nonCapturingGroup(.lookahead, component().regex.root))
170+
}
171+
}
115172

173+
/// A regex component that allows a match to continue only if its contents
174+
/// do not match at the given location.
175+
///
176+
/// A negative lookahead is a zero-length assertion that its included regex
177+
/// does not match at a particular position. Lookaheads do not advance the
178+
/// overall matching position in the input string — once a lookahead succeeds,
179+
/// matching continues in the regex from the same position.
180+
@available(SwiftStdlib 5.7, *)
181+
public struct NegativeLookahead<Output>: _BuiltinRegexComponent {
182+
public var regex: Regex<Output>
183+
184+
init(_ regex: Regex<Output>) {
185+
self.regex = regex
186+
}
187+
188+
/// Creates a negative lookahead from the given regex component.
189+
public init<R: RegexComponent>(
190+
_ component: R
191+
) where R.RegexOutput == Output {
192+
self.init(node: .nonCapturingGroup(.negativeLookahead, component.regex.root))
193+
}
194+
195+
/// Creates a negative lookahead from the regex generated by the given builder
196+
/// closure.
116197
public init<R: RegexComponent>(
117-
negative: Bool = false,
118198
@RegexComponentBuilder _ component: () -> R
119199
) where R.RegexOutput == Output {
120-
self.init(node: .nonCapturingGroup(
121-
negative ? .negativeLookahead : .lookahead, component().regex.root))
200+
self.init(node: .nonCapturingGroup(.negativeLookahead, component().regex.root))
122201
}
123202
}

Sources/_RegexParser/Regex/AST/AST.swift

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,6 @@ public struct AST: Hashable {
2525
extension AST {
2626
/// Whether this AST tree contains at least one capture nested inside of it.
2727
public var hasCapture: Bool { root.hasCapture }
28-
29-
/// The capture structure of this AST tree.
30-
public var captureStructure: CaptureStructure {
31-
var constructor = CaptureStructure.Constructor(.flatten)
32-
return root._captureStructure(&constructor)
33-
}
3428
}
3529

3630
extension AST {

Sources/_RegexParser/Regex/AST/MatchingOptions.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ extension AST {
1717
case caseInsensitive // i
1818
case allowDuplicateGroupNames // J
1919
case multiline // m
20-
case noAutoCapture // n
20+
case namedCapturesOnly // n
2121
case singleLine // s
2222
case reluctantByDefault // U
2323
case extended // x
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
public struct CaptureList {
13+
public var captures: [Capture]
14+
15+
public init<S: Sequence>(_ s: S) where S.Element == Capture {
16+
captures = Array(s)
17+
}
18+
19+
public mutating func append(_ c: Capture) {
20+
captures.append(c)
21+
}
22+
}
23+
24+
extension CaptureList {
25+
public struct Capture {
26+
public var name: String?
27+
public var type: Any.Type?
28+
public var optionalDepth: Int
29+
30+
public init(
31+
name: String? = nil,
32+
type: Any.Type? = nil,
33+
optionalDepth: Int
34+
) {
35+
self.name = name
36+
self.type = type
37+
self.optionalDepth = optionalDepth
38+
}
39+
}
40+
}
41+
42+
// MARK: Generating from AST
43+
44+
extension AST.Node {
45+
public func _addCaptures(
46+
to list: inout CaptureList,
47+
optionalNesting nesting: Int
48+
) {
49+
let addOptional = nesting+1
50+
switch self {
51+
case let .alternation(a):
52+
for child in a.children {
53+
child._addCaptures(to: &list, optionalNesting: addOptional)
54+
}
55+
56+
case let .concatenation(c):
57+
for child in c.children {
58+
child._addCaptures(to: &list, optionalNesting: nesting)
59+
}
60+
61+
case let .group(g):
62+
switch g.kind.value {
63+
case .capture:
64+
list.append(.init(optionalDepth: nesting))
65+
66+
case .namedCapture(let name):
67+
list.append(.init(name: name.value, optionalDepth: nesting))
68+
69+
case .balancedCapture(let b):
70+
list.append(.init(name: b.name?.value, optionalDepth: nesting))
71+
72+
default: break
73+
}
74+
g.child._addCaptures(to: &list, optionalNesting: nesting)
75+
76+
case .conditional(let c):
77+
switch c.condition.kind {
78+
case .group(let g):
79+
AST.Node.group(g)._addCaptures(to: &list, optionalNesting: nesting)
80+
default:
81+
break
82+
}
83+
84+
c.trueBranch._addCaptures(to: &list, optionalNesting: addOptional)
85+
c.falseBranch._addCaptures(to: &list, optionalNesting: addOptional)
86+
87+
case .quantification(let q):
88+
var optNesting = nesting
89+
if q.amount.value.bounds.atLeast == 0 {
90+
optNesting += 1
91+
}
92+
q.child._addCaptures(to: &list, optionalNesting: optNesting)
93+
94+
case .absentFunction(let abs):
95+
switch abs.kind {
96+
case .expression(_, _, let child):
97+
child._addCaptures(to: &list, optionalNesting: nesting)
98+
case .clearer, .repeater, .stopper:
99+
break
100+
}
101+
102+
case .quote, .trivia, .atom, .customCharacterClass, .empty:
103+
break
104+
}
105+
}
106+
107+
public var _captureList: CaptureList {
108+
var caps = CaptureList()
109+
self._addCaptures(to: &caps, optionalNesting: 0)
110+
return caps
111+
}
112+
}
113+
114+
extension AST {
115+
/// Get the capture list for this AST
116+
public var captureList: CaptureList {
117+
root._captureList
118+
}
119+
}
120+
121+
// MARK: Convenience for testing and inspection
122+
123+
extension CaptureList.Capture: Equatable {
124+
public static func == (lhs: Self, rhs: Self) -> Bool {
125+
lhs.name == rhs.name &&
126+
lhs.optionalDepth == rhs.optionalDepth &&
127+
lhs.type == rhs.type
128+
}
129+
}
130+
extension CaptureList: Equatable {}
131+
132+
extension CaptureList.Capture: CustomStringConvertible {
133+
public var description: String {
134+
let typeStr: String
135+
if let ty = type {
136+
typeStr = "\(ty)"
137+
} else {
138+
typeStr = "Substring"
139+
}
140+
let suffix = String(repeating: "?", count: optionalDepth)
141+
return typeStr + suffix
142+
}
143+
}
144+
extension CaptureList: CustomStringConvertible {
145+
public var description: String {
146+
"(" + captures.map(\.description).joined(separator: ", ") + ")"
147+
}
148+
}
149+
150+
extension CaptureList: ExpressibleByArrayLiteral {
151+
public init(arrayLiteral elements: Capture...) {
152+
self.init(elements)
153+
}
154+
}

0 commit comments

Comments
 (0)