Skip to content

Commit a0f2a44

Browse files
authored
Merge pull request swiftlang#409 from natecook1000/main_as_of_12_may
Sync 5.7 branch with main
2 parents 53a27f4 + 1a65e1e commit a0f2a44

35 files changed

+2418
-619
lines changed

Package.swift

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,18 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([
77
"-Xfrontend",
88
"-define-availability",
99
"-Xfrontend",
10-
#"SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999"#,
10+
"SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999",
1111
])
1212

13-
let stdlibSettings: [PackageDescription.SwiftSetting] = [
13+
/// Swift settings for building a private stdlib-like module that is to be used
14+
/// by other stdlib-like modules only.
15+
let privateStdlibSettings: [PackageDescription.SwiftSetting] = [
16+
.unsafeFlags(["-Xfrontend", "-disable-implicit-concurrency-module-import"]),
17+
.unsafeFlags(["-Xfrontend", "-disable-implicit-string-processing-module-import"]),
18+
]
19+
20+
/// Swift settings for building a user-facing stdlib-like module.
21+
let publicStdlibSettings: [PackageDescription.SwiftSetting] = [
1422
.unsafeFlags(["-enable-library-evolution"]),
1523
.unsafeFlags(["-Xfrontend", "-disable-implicit-concurrency-module-import"]),
1624
.unsafeFlags(["-Xfrontend", "-disable-implicit-string-processing-module-import"]),
@@ -43,7 +51,7 @@ let package = Package(
4351
.target(
4452
name: "_RegexParser",
4553
dependencies: [],
46-
swiftSettings: stdlibSettings),
54+
swiftSettings: privateStdlibSettings),
4755
.testTarget(
4856
name: "MatchingEngineTests",
4957
dependencies: [
@@ -55,16 +63,16 @@ let package = Package(
5563
.target(
5664
name: "_StringProcessing",
5765
dependencies: ["_RegexParser", "_CUnicode"],
58-
swiftSettings: stdlibSettings),
66+
swiftSettings: publicStdlibSettings),
5967
.target(
6068
name: "RegexBuilder",
6169
dependencies: ["_StringProcessing", "_RegexParser"],
62-
swiftSettings: stdlibSettings),
70+
swiftSettings: publicStdlibSettings),
6371
.testTarget(
6472
name: "RegexTests",
6573
dependencies: ["_StringProcessing"],
6674
swiftSettings: [
67-
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
75+
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
6876
]),
6977
.testTarget(
7078
name: "RegexBuilderTests",

Sources/PatternConverter/PatternConverter.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ struct PatternConverter: ParsableCommand {
5050
print("Converting '\(delim)\(regex)\(delim)'")
5151

5252
let ast = try _RegexParser.parse(
53-
regex,
53+
regex, .semantic,
5454
experimentalSyntax ? .experimental : .traditional)
5555

5656
// Show rendered source ranges

Sources/_RegexParser/Regex/AST/AST.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ extension AST {
2929

3030
extension AST {
3131
/// A node in the regex AST.
32-
@frozen
3332
public indirect enum Node:
3433
Hashable, _TreeNode //, _ASTPrintable ASTValue, ASTAction
3534
{
@@ -125,7 +124,9 @@ extension AST.Node {
125124
switch self {
126125
case .atom(let a):
127126
return a.isQuantifiable
128-
case .group, .conditional, .customCharacterClass, .absentFunction:
127+
case .group(let g):
128+
return g.isQuantifiable
129+
case .conditional, .customCharacterClass, .absentFunction:
129130
return true
130131
case .alternation, .concatenation, .quantification, .quote, .trivia,
131132
.empty:
@@ -247,7 +248,6 @@ extension AST {
247248
}
248249

249250
public struct Reference: Hashable {
250-
@frozen
251251
public enum Kind: Hashable {
252252
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
253253
// Oniguruma: \k<n>, \k'n'

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ extension AST {
1919
self.location = loc
2020
}
2121

22-
@frozen
2322
public enum Kind: Hashable {
2423
/// Just a character
2524
///
@@ -29,7 +28,13 @@ extension AST {
2928
/// A Unicode scalar value written as a literal
3029
///
3130
/// \u{...}, \0dd, \x{...}, ...
32-
case scalar(Unicode.Scalar)
31+
case scalar(Scalar)
32+
33+
/// A whitespace-separated sequence of Unicode scalar values which are
34+
/// implicitly splatted out.
35+
///
36+
/// `\u{A B C}` -> `\u{A}\u{B}\u{C}`
37+
case scalarSequence(ScalarSequence)
3338

3439
/// A Unicode property, category, or script, including those written using
3540
/// POSIX syntax.
@@ -84,6 +89,7 @@ extension AST.Atom {
8489
switch kind {
8590
case .char(let v): return v
8691
case .scalar(let v): return v
92+
case .scalarSequence(let v): return v
8793
case .property(let v): return v
8894
case .escaped(let v): return v
8995
case .keyboardControl(let v): return v
@@ -106,6 +112,30 @@ extension AST.Atom {
106112
}
107113
}
108114

115+
extension AST.Atom {
116+
public struct Scalar: Hashable {
117+
public var value: UnicodeScalar
118+
public var location: SourceLocation
119+
120+
public init(_ value: UnicodeScalar, _ location: SourceLocation) {
121+
self.value = value
122+
self.location = location
123+
}
124+
}
125+
126+
public struct ScalarSequence: Hashable {
127+
public var scalars: [Scalar]
128+
public var trivia: [AST.Trivia]
129+
130+
public init(_ scalars: [Scalar], trivia: [AST.Trivia]) {
131+
precondition(scalars.count > 1, "Expected multiple scalars")
132+
self.scalars = scalars
133+
self.trivia = trivia
134+
}
135+
public var scalarValues: [Unicode.Scalar] { scalars.map(\.value) }
136+
}
137+
}
138+
109139
extension AST.Atom {
110140

111141
// TODO: We might scrap this and break out a few categories so
@@ -115,7 +145,6 @@ extension AST.Atom {
115145

116146
// Characters, character types, literals, etc., derived from
117147
// an escape sequence.
118-
@frozen
119148
public enum EscapedBuiltin: Hashable {
120149
// TODO: better doc comments
121150

@@ -368,7 +397,6 @@ extension AST.Atom {
368397
}
369398

370399
extension AST.Atom.CharacterProperty {
371-
@frozen
372400
public enum Kind: Hashable {
373401
/// Matches any character, equivalent to Oniguruma's '\O'.
374402
case any
@@ -396,6 +424,9 @@ extension AST.Atom.CharacterProperty {
396424
case script(Unicode.Script)
397425
case scriptExtension(Unicode.Script)
398426

427+
/// Character name in the form `\p{name=...}`
428+
case named(String)
429+
399430
case posix(Unicode.POSIXProperty)
400431

401432
/// Some special properties implemented by PCRE and Oniguruma.
@@ -404,7 +435,6 @@ extension AST.Atom.CharacterProperty {
404435
}
405436

406437
// TODO: erm, separate out or fold into something? splat it in?
407-
@frozen
408438
public enum PCRESpecialCategory: String, Hashable {
409439
case alphanumeric = "Xan"
410440
case posixSpace = "Xps"
@@ -416,7 +446,6 @@ extension AST.Atom.CharacterProperty {
416446

417447
extension AST.Atom {
418448
/// Anchors and other built-in zero-width assertions.
419-
@frozen
420449
public enum AssertionKind: String {
421450
/// \A
422451
case startOfSubject = #"\A"#
@@ -665,6 +694,23 @@ extension AST.Atom.EscapedBuiltin {
665694
return nil
666695
}
667696
}
697+
698+
public var isQuantifiable: Bool {
699+
switch self {
700+
case .alarm, .escape, .formfeed, .newline, .carriageReturn, .tab,
701+
.singleDataUnit, .decimalDigit, .notDecimalDigit, .horizontalWhitespace,
702+
.notHorizontalWhitespace, .notNewline, .newlineSequence, .whitespace,
703+
.notWhitespace, .verticalTab, .notVerticalTab, .wordCharacter,
704+
.notWordCharacter, .backspace, .graphemeCluster, .trueAnychar:
705+
return true
706+
707+
case .wordBoundary, .notWordBoundary, .startOfSubject,
708+
.endOfSubjectBeforeNewline, .endOfSubject,
709+
.firstMatchingPositionInSubject, .resetStartOfMatch, .textSegment,
710+
.notTextSegment:
711+
return false
712+
}
713+
}
668714
}
669715

670716
extension AST.Atom {
@@ -677,7 +723,7 @@ extension AST.Atom {
677723
case .char(let c):
678724
return c
679725
case .scalar(let s):
680-
return Character(s)
726+
return Character(s.value)
681727

682728
case .escaped(let c):
683729
return c.scalarValue.map(Character.init)
@@ -693,8 +739,9 @@ extension AST.Atom {
693739
// the AST? Or defer for the matching engine?
694740
return nil
695741

696-
case .property, .any, .startOfLine, .endOfLine, .backreference, .subpattern,
697-
.callout, .backtrackingDirective, .changeMatchingOptions:
742+
case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
743+
.backreference, .subpattern, .callout, .backtrackingDirective,
744+
.changeMatchingOptions:
698745
return nil
699746
}
700747
}
@@ -716,13 +763,21 @@ extension AST.Atom {
716763
/// A string literal representation of the atom, if possible.
717764
///
718765
/// Individual characters are returned as-is, and Unicode scalars are
719-
/// presented using "\u{nnnn}" syntax.
766+
/// presented using "\u{nn nn ...}" syntax.
720767
public var literalStringValue: String? {
768+
func scalarLiteral(_ u: [UnicodeScalar]) -> String {
769+
let digits = u.map { String($0.value, radix: 16, uppercase: true) }
770+
.joined(separator: " ")
771+
return "\\u{\(digits)}"
772+
}
721773
switch kind {
722774
case .char(let c):
723775
return String(c)
724776
case .scalar(let s):
725-
return "\\u{\(String(s.value, radix: 16, uppercase: true))}"
777+
return scalarLiteral([s.value])
778+
779+
case .scalarSequence(let s):
780+
return scalarLiteral(s.scalarValues)
726781

727782
case .keyboardControl(let x):
728783
return "\\C-\(x)"
@@ -746,6 +801,10 @@ extension AST.Atom {
746801
case .changeMatchingOptions:
747802
return false
748803
// TODO: Are callouts quantifiable?
804+
case .escaped(let esc):
805+
return esc.isQuantifiable
806+
case .startOfLine, .endOfLine:
807+
return false
749808
default:
750809
return true
751810
}

Sources/_RegexParser/Regex/AST/CustomCharClass.swift

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ extension AST {
2727
self.location = sr
2828
}
2929

30-
@frozen
3130
public enum Member: Hashable {
3231
/// A nested custom character class `[[ab][cd]]`
3332
case custom(CustomCharacterClass)
@@ -59,13 +58,11 @@ extension AST {
5958
self.rhs = rhs
6059
}
6160
}
62-
@frozen
6361
public enum SetOp: String, Hashable {
6462
case subtraction = "--"
6563
case intersection = "&&"
6664
case symmetricDifference = "~~"
6765
}
68-
@frozen
6966
public enum Start: String {
7067
case normal = "["
7168
case inverted = "[^"

Sources/_RegexParser/Regex/AST/Group.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,3 +136,18 @@ extension AST.Group {
136136
}
137137
}
138138
}
139+
140+
extension AST.Group {
141+
var isQuantifiable: Bool {
142+
switch kind.value {
143+
case .capture, .namedCapture, .balancedCapture, .nonCapture,
144+
.nonCaptureReset, .atomicNonCapturing, .scriptRun, .atomicScriptRun,
145+
.changeMatchingOptions:
146+
return true
147+
148+
case .lookahead, .negativeLookahead, .nonAtomicLookahead,
149+
.lookbehind, .negativeLookbehind, .nonAtomicLookbehind:
150+
return false
151+
}
152+
}
153+
}

Sources/_RegexParser/Regex/AST/Quantification.swift

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ extension AST {
3636
self.trivia = trivia
3737
}
3838

39-
@frozen
4039
public enum Amount: Hashable {
4140
case zeroOrMore // *
4241
case oneOrMore // +
@@ -47,7 +46,6 @@ extension AST {
4746
case range(Located<Int>, Located<Int>) // {n,m}
4847
}
4948

50-
@frozen
5149
public enum Kind: String, Hashable {
5250
case eager = ""
5351
case reluctant = "?"

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,18 @@ extension CaptureList {
2626
public var name: String?
2727
public var type: Any.Type?
2828
public var optionalDepth: Int
29+
public var location: SourceLocation
2930

3031
public init(
3132
name: String? = nil,
3233
type: Any.Type? = nil,
33-
optionalDepth: Int
34+
optionalDepth: Int,
35+
_ location: SourceLocation
3436
) {
3537
self.name = name
3638
self.type = type
3739
self.optionalDepth = optionalDepth
40+
self.location = location
3841
}
3942
}
4043
}
@@ -61,13 +64,14 @@ extension AST.Node {
6164
case let .group(g):
6265
switch g.kind.value {
6366
case .capture:
64-
list.append(.init(optionalDepth: nesting))
67+
list.append(.init(optionalDepth: nesting, g.location))
6568

6669
case .namedCapture(let name):
67-
list.append(.init(name: name.value, optionalDepth: nesting))
70+
list.append(.init(name: name.value, optionalDepth: nesting, g.location))
6871

6972
case .balancedCapture(let b):
70-
list.append(.init(name: b.name?.value, optionalDepth: nesting))
73+
list.append(.init(name: b.name?.value, optionalDepth: nesting,
74+
g.location))
7175

7276
default: break
7377
}
@@ -124,7 +128,8 @@ extension CaptureList.Capture: Equatable {
124128
public static func == (lhs: Self, rhs: Self) -> Bool {
125129
lhs.name == rhs.name &&
126130
lhs.optionalDepth == rhs.optionalDepth &&
127-
lhs.type == rhs.type
131+
lhs.type == rhs.type &&
132+
lhs.location == rhs.location
128133
}
129134
}
130135
extension CaptureList: Equatable {}

Sources/_RegexParser/Regex/Parse/CharacterPropertyClassification.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ extension Source {
1818
// This follows the rules provided by UAX44-LM3, including trying to drop an
1919
// "is" prefix, which isn't required by UTS#18 RL1.2, but is nice for
2020
// consistency with other engines and the Unicode.Scalar.Properties names.
21-
let str = str.filter { !$0.isWhitespace && $0 != "_" && $0 != "-" }
21+
let str = str.filter { !$0.isPatternWhitespace && $0 != "_" && $0 != "-" }
2222
.lowercased()
2323
if let m = match(str) {
2424
return m
@@ -428,6 +428,8 @@ extension Source {
428428
if let cat = classifyGeneralCategory(value) {
429429
return .generalCategory(cat)
430430
}
431+
case "name", "na":
432+
return .named(value)
431433
default:
432434
break
433435
}

0 commit comments

Comments
 (0)