Skip to content

Commit 62fd560

Browse files
authored
Merge pull request #442 from hamishknight/main-merge
2 parents f0a4590 + 6d1d146 commit 62fd560

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+3290
-1107
lines changed

Package.swift

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,18 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([
77
"-Xfrontend",
88
"-define-availability",
99
"-Xfrontend",
10-
#"SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999"#,
10+
"SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
1111
])
1212

13-
let stdlibSettings: [PackageDescription.SwiftSetting] = [
13+
/// Swift settings for building a private stdlib-like module that is to be used
14+
/// by other stdlib-like modules only.
15+
let privateStdlibSettings: [PackageDescription.SwiftSetting] = [
16+
.unsafeFlags(["-Xfrontend", "-disable-implicit-concurrency-module-import"]),
17+
.unsafeFlags(["-Xfrontend", "-disable-implicit-string-processing-module-import"]),
18+
]
19+
20+
/// Swift settings for building a user-facing stdlib-like module.
21+
let publicStdlibSettings: [PackageDescription.SwiftSetting] = [
1422
.unsafeFlags(["-enable-library-evolution"]),
1523
.unsafeFlags(["-Xfrontend", "-disable-implicit-concurrency-module-import"]),
1624
.unsafeFlags(["-Xfrontend", "-disable-implicit-string-processing-module-import"]),
@@ -43,7 +51,7 @@ let package = Package(
4351
.target(
4452
name: "_RegexParser",
4553
dependencies: [],
46-
swiftSettings: stdlibSettings),
54+
swiftSettings: privateStdlibSettings),
4755
.testTarget(
4856
name: "MatchingEngineTests",
4957
dependencies: [
@@ -55,11 +63,11 @@ let package = Package(
5563
.target(
5664
name: "_StringProcessing",
5765
dependencies: ["_RegexParser", "_CUnicode"],
58-
swiftSettings: stdlibSettings),
66+
swiftSettings: publicStdlibSettings),
5967
.target(
6068
name: "RegexBuilder",
6169
dependencies: ["_StringProcessing", "_RegexParser"],
62-
swiftSettings: stdlibSettings),
70+
swiftSettings: publicStdlibSettings),
6371
.testTarget(
6472
name: "RegexTests",
6573
dependencies: ["_StringProcessing"],

Sources/PatternConverter/PatternConverter.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ struct PatternConverter: ParsableCommand {
7070

7171
print()
7272
if !skipDSL {
73-
let render = ast.renderAsBuilderDSL(
73+
let render = renderAsBuilderDSL(
74+
ast: ast,
7475
maxTopDownLevels: topDownConversionLimit,
7576
minBottomUpLevels: bottomUpConversionLimit
7677
)

Sources/RegexBuilder/Algorithms.swift

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//
1010
//===----------------------------------------------------------------------===//
1111

12-
import _StringProcessing
12+
@_spi(RegexBuilder) import _StringProcessing
1313

1414
// FIXME(rdar://92459215): We should be using 'some RegexComponent' instead of
1515
// <R: RegexComponent> for the methods below that don't impose any additional
@@ -313,3 +313,31 @@ where Self: BidirectionalCollection, SubSequence == Substring {
313313
try replace(content(), maxReplacements: maxReplacements, with: replacement)
314314
}
315315
}
316+
317+
// String split overload breakers
318+
319+
extension StringProtocol where SubSequence == Substring {
320+
@available(SwiftStdlib 5.7, *)
321+
public func split(
322+
separator: String,
323+
maxSplits: Int = .max,
324+
omittingEmptySubsequences: Bool = true
325+
) -> [Substring] {
326+
return _split(
327+
separator: separator,
328+
maxSplits: maxSplits,
329+
omittingEmptySubsequences: omittingEmptySubsequences)
330+
}
331+
332+
@available(SwiftStdlib 5.7, *)
333+
public func split(
334+
separator: Substring,
335+
maxSplits: Int = .max,
336+
omittingEmptySubsequences: Bool = true
337+
) -> [Substring] {
338+
return _split(
339+
separator: separator,
340+
maxSplits: maxSplits,
341+
omittingEmptySubsequences: omittingEmptySubsequences)
342+
}
343+
}

Sources/RegexBuilder/DSL.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,19 @@ extension DSLTree.Node {
127127
}
128128
}
129129

130+
/// A regex component that matches exactly one occurrence of its underlying
131+
/// component.
132+
@available(SwiftStdlib 5.7, *)
133+
public struct One<Output>: RegexComponent {
134+
public var regex: Regex<Output>
135+
136+
public init<Component: RegexComponent>(
137+
_ component: Component
138+
) where Component.RegexOutput == Output {
139+
self.regex = component.regex
140+
}
141+
}
142+
130143
@available(SwiftStdlib 5.7, *)
131144
public struct OneOrMore<Output>: _BuiltinRegexComponent {
132145
public var regex: Regex<Output>

Sources/_RegexParser/Regex/AST/AST.swift

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ extension AST {
2929

3030
extension AST {
3131
/// A node in the regex AST.
32-
@frozen
3332
public indirect enum Node:
3433
Hashable, _TreeNode //, _ASTPrintable ASTValue, ASTAction
3534
{
@@ -53,6 +52,9 @@ extension AST {
5352
/// Comments, non-semantic whitespace, etc
5453
case trivia(Trivia)
5554

55+
/// Intepolation `<{...}>`, currently reserved for future use.
56+
case interpolation(Interpolation)
57+
5658
case atom(Atom)
5759

5860
case customCharacterClass(CustomCharacterClass)
@@ -78,6 +80,7 @@ extension AST.Node {
7880
case let .quantification(v): return v
7981
case let .quote(v): return v
8082
case let .trivia(v): return v
83+
case let .interpolation(v): return v
8184
case let .atom(v): return v
8285
case let .customCharacterClass(v): return v
8386
case let .empty(v): return v
@@ -130,7 +133,7 @@ extension AST.Node {
130133
case .conditional, .customCharacterClass, .absentFunction:
131134
return true
132135
case .alternation, .concatenation, .quantification, .quote, .trivia,
133-
.empty:
136+
.empty, .interpolation:
134137
return false
135138
}
136139
}
@@ -194,6 +197,16 @@ extension AST {
194197
}
195198
}
196199

200+
public struct Interpolation: Hashable, _ASTNode {
201+
public let contents: String
202+
public let location: SourceLocation
203+
204+
public init(_ contents: String, _ location: SourceLocation) {
205+
self.contents = contents
206+
self.location = location
207+
}
208+
}
209+
197210
public struct Empty: Hashable, _ASTNode {
198211
public let location: SourceLocation
199212

@@ -249,7 +262,6 @@ extension AST {
249262
}
250263

251264
public struct Reference: Hashable {
252-
@frozen
253265
public enum Kind: Hashable {
254266
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
255267
// Oniguruma: \k<n>, \k'n'

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ extension AST {
1919
self.location = loc
2020
}
2121

22-
@frozen
2322
public enum Kind: Hashable {
2423
/// Just a character
2524
///
@@ -146,7 +145,6 @@ extension AST.Atom {
146145

147146
// Characters, character types, literals, etc., derived from
148147
// an escape sequence.
149-
@frozen
150148
public enum EscapedBuiltin: Hashable {
151149
// TODO: better doc comments
152150

@@ -399,7 +397,6 @@ extension AST.Atom {
399397
}
400398

401399
extension AST.Atom.CharacterProperty {
402-
@frozen
403400
public enum Kind: Hashable {
404401
/// Matches any character, equivalent to Oniguruma's '\O'.
405402
case any
@@ -430,27 +427,73 @@ extension AST.Atom.CharacterProperty {
430427
/// Character name in the form `\p{name=...}`
431428
case named(String)
432429

430+
/// Numeric type.
431+
case numericType(Unicode.NumericType)
432+
433+
/// Numeric value.
434+
case numericValue(Double)
435+
436+
/// Case mapping.
437+
case mapping(MapKind, String)
438+
439+
/// Canonical Combining Class.
440+
case ccc(Unicode.CanonicalCombiningClass)
441+
442+
/// Character age, as per UnicodeScalar.Properties.age.
443+
case age(major: Int, minor: Int)
444+
445+
/// A block property.
446+
case block(Unicode.Block)
447+
433448
case posix(Unicode.POSIXProperty)
434449

435450
/// Some special properties implemented by PCRE and Oniguruma.
436451
case pcreSpecial(PCRESpecialCategory)
437-
case onigurumaSpecial(OnigurumaSpecialProperty)
452+
453+
/// Some special properties implemented by Java.
454+
case javaSpecial(JavaSpecial)
455+
456+
public enum MapKind: Hashable {
457+
case lowercase
458+
case uppercase
459+
case titlecase
460+
}
438461
}
439462

440-
// TODO: erm, separate out or fold into something? splat it in?
441-
@frozen
442463
public enum PCRESpecialCategory: String, Hashable {
443464
case alphanumeric = "Xan"
444465
case posixSpace = "Xps"
445466
case perlSpace = "Xsp"
446467
case universallyNamed = "Xuc"
447468
case perlWord = "Xwd"
448469
}
470+
471+
/// Special Java properties that correspond to methods on
472+
/// `java.lang.Character`, with the `java` prefix replaced by `is`.
473+
public enum JavaSpecial: String, Hashable, CaseIterable {
474+
case alphabetic = "javaAlphabetic"
475+
case defined = "javaDefined"
476+
case digit = "javaDigit"
477+
case identifierIgnorable = "javaIdentifierIgnorable"
478+
case ideographic = "javaIdeographic"
479+
case isoControl = "javaISOControl"
480+
case javaIdentifierPart = "javaJavaIdentifierPart" // not a typo, that's actually the name
481+
case javaIdentifierStart = "javaJavaIdentifierStart" // not a typo, that's actually the name
482+
case javaLetter = "javaLetter"
483+
case javaLetterOrDigit = "javaLetterOrDigit"
484+
case lowerCase = "javaLowerCase"
485+
case mirrored = "javaMirrored"
486+
case spaceChar = "javaSpaceChar"
487+
case titleCase = "javaTitleCase"
488+
case unicodeIdentifierPart = "javaUnicodeIdentifierPart"
489+
case unicodeIdentifierStart = "javaUnicodeIdentifierStart"
490+
case upperCase = "javaUpperCase"
491+
case whitespace = "javaWhitespace"
492+
}
449493
}
450494

451495
extension AST.Atom {
452496
/// Anchors and other built-in zero-width assertions.
453-
@frozen
454497
public enum AssertionKind: String {
455498
/// \A
456499
case startOfSubject = #"\A"#
@@ -824,7 +867,7 @@ extension AST.Node {
824867
case .alternation, .concatenation, .group,
825868
.conditional, .quantification, .quote,
826869
.trivia, .customCharacterClass, .empty,
827-
.absentFunction:
870+
.absentFunction, .interpolation:
828871
return nil
829872
}
830873
}

Sources/_RegexParser/Regex/AST/CustomCharClass.swift

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ extension AST {
2727
self.location = sr
2828
}
2929

30-
@frozen
3130
public enum Member: Hashable {
3231
/// A nested custom character class `[[ab][cd]]`
3332
case custom(CustomCharacterClass)
@@ -52,20 +51,23 @@ extension AST {
5251
public var lhs: Atom
5352
public var dashLoc: SourceLocation
5453
public var rhs: Atom
54+
public var trivia: [AST.Trivia]
5555

56-
public init(_ lhs: Atom, _ dashLoc: SourceLocation, _ rhs: Atom) {
56+
public init(
57+
_ lhs: Atom, _ dashLoc: SourceLocation, _ rhs: Atom,
58+
trivia: [AST.Trivia]
59+
) {
5760
self.lhs = lhs
5861
self.dashLoc = dashLoc
5962
self.rhs = rhs
63+
self.trivia = trivia
6064
}
6165
}
62-
@frozen
6366
public enum SetOp: String, Hashable {
6467
case subtraction = "--"
6568
case intersection = "&&"
6669
case symmetricDifference = "~~"
6770
}
68-
@frozen
6971
public enum Start: String {
7072
case normal = "["
7173
case inverted = "[^"
@@ -98,6 +100,11 @@ extension CustomCC.Member {
98100
return false
99101
}
100102

103+
public var asTrivia: AST.Trivia? {
104+
guard case .trivia(let t) = self else { return nil }
105+
return t
106+
}
107+
101108
public var isSemantic: Bool {
102109
!isTrivia
103110
}

Sources/_RegexParser/Regex/AST/Quantification.swift

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ extension AST {
3636
self.trivia = trivia
3737
}
3838

39-
@frozen
4039
public enum Amount: Hashable {
4140
case zeroOrMore // *
4241
case oneOrMore // +
@@ -47,7 +46,6 @@ extension AST {
4746
case range(Located<Int>, Located<Int>) // {n,m}
4847
}
4948

50-
@frozen
5149
public enum Kind: String, Hashable {
5250
case eager = ""
5351
case reluctant = "?"

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,21 @@ extension CaptureList {
4242
}
4343
}
4444

45+
extension CaptureList {
46+
/// Retrieve the capture index of a given named capture, or `nil` if there is
47+
/// no such capture.
48+
public func indexOfCapture(named name: String) -> Int? {
49+
// Named references are guaranteed to be unique for literal ASTs by Sema.
50+
// The DSL tree does not use named references.
51+
captures.indices.first(where: { captures[$0].name == name })
52+
}
53+
54+
/// Whether the capture list has a given named capture.
55+
public func hasCapture(named name: String) -> Bool {
56+
indexOfCapture(named: name) != nil
57+
}
58+
}
59+
4560
// MARK: Generating from AST
4661

4762
extension AST.Node {
@@ -103,7 +118,7 @@ extension AST.Node {
103118
break
104119
}
105120

106-
case .quote, .trivia, .atom, .customCharacterClass, .empty:
121+
case .quote, .trivia, .atom, .customCharacterClass, .empty, .interpolation:
107122
break
108123
}
109124
}

0 commit comments

Comments
 (0)