Skip to content

Commit 5ee1a22

Browse files
authored
Merge pull request #117 from hamishknight/groups-n-stuff
2 parents b94c9d5 + df6be0c commit 5ee1a22

File tree

16 files changed

+609
-126
lines changed

16 files changed

+609
-126
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,20 @@ extension AST {
103103
}
104104
return self.children?.any(\.hasCapture) ?? false
105105
}
106+
107+
/// Whether this AST node may be used as the operand of a quantifier such as
108+
/// `?`, `+` or `*`.
109+
public var isQuantifiable: Bool {
110+
switch self {
111+
case .atom(let a):
112+
return a.isQuantifiable
113+
case .group, .conditional, .customCharacterClass:
114+
return true
115+
case .alternation, .concatenation, .quantification, .quote, .trivia,
116+
.empty, .groupTransform:
117+
return false
118+
}
119+
}
106120
}
107121

108122
// MARK: - AST types

Sources/_MatchingEngine/Regex/AST/Atom.swift

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ extension AST {
6666
// References
6767
case backreference(Reference)
6868
case subpattern(Reference)
69+
70+
// (?C)
71+
case callout(Callout)
72+
73+
// (*ACCEPT), (*FAIL), ...
74+
case backtrackingDirective(BacktrackingDirective)
6975
}
7076
}
7177
}
@@ -443,6 +449,59 @@ extension AST.Atom {
443449
}
444450
}
445451

452+
extension AST.Atom {
453+
public struct Callout: Hashable {
454+
public enum Argument: Hashable {
455+
case number(Int)
456+
case string(String)
457+
}
458+
public var arg: AST.Located<Argument>
459+
public init(_ arg: AST.Located<Argument>) {
460+
self.arg = arg
461+
}
462+
}
463+
}
464+
465+
extension AST.Atom {
466+
public struct BacktrackingDirective: Hashable {
467+
public enum Kind: Hashable {
468+
/// (*ACCEPT)
469+
case accept
470+
471+
/// (*FAIL)
472+
case fail
473+
474+
/// (*MARK:NAME)
475+
case mark
476+
477+
/// (*COMMIT)
478+
case commit
479+
480+
/// (*PRUNE)
481+
case prune
482+
483+
/// (*SKIP)
484+
case skip
485+
486+
/// (*THEN)
487+
case then
488+
}
489+
public var kind: AST.Located<Kind>
490+
public var name: AST.Located<String>?
491+
492+
public init(_ kind: AST.Located<Kind>, name: AST.Located<String>?) {
493+
self.kind = kind
494+
self.name = name
495+
}
496+
497+
public var isQuantifiable: Bool {
498+
// As per http://pcre.org/current/doc/html/pcre2pattern.html#SEC29, only
499+
// (*ACCEPT) is quantifiable.
500+
kind.value == .accept
501+
}
502+
}
503+
}
504+
446505
extension AST.Atom {
447506
/// Retrieve the character value of the atom if it represents a literal
448507
/// character or unicode scalar, nil otherwise.
@@ -458,7 +517,8 @@ extension AST.Atom {
458517
fallthrough
459518

460519
case .property, .escaped, .any, .startOfLine, .endOfLine,
461-
.backreference, .subpattern, .namedCharacter:
520+
.backreference, .subpattern, .namedCharacter, .callout,
521+
.backtrackingDirective:
462522
return nil
463523
}
464524
}
@@ -483,10 +543,21 @@ extension AST.Atom {
483543
return "\\M-\\C-\(x)"
484544

485545
case .property, .escaped, .any, .startOfLine, .endOfLine,
486-
.backreference, .subpattern, .namedCharacter:
546+
.backreference, .subpattern, .namedCharacter, .callout,
547+
.backtrackingDirective:
487548
return nil
488549
}
489550
}
551+
552+
public var isQuantifiable: Bool {
553+
switch kind {
554+
case .backtrackingDirective(let b):
555+
return b.isQuantifiable
556+
// TODO: Are callouts quantifiable?
557+
default:
558+
return true
559+
}
560+
}
490561
}
491562

492563
extension AST {

Sources/_MatchingEngine/Regex/AST/Group.swift

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ extension AST {
3131
// (?<name>...) (?'name'...) (?P<name>...)
3232
case namedCapture(Located<String>)
3333

34+
// (?<name-priorName>) (?'name-priorName')
35+
case balancedCapture(BalancedCapture)
36+
3437
// (?:...)
3538
case nonCapture
3639

@@ -79,7 +82,7 @@ extension AST {
7982
extension AST.Group.Kind {
8083
public var isCapturing: Bool {
8184
switch self {
82-
case .capture, .namedCapture: return true
85+
case .capture, .namedCapture, .balancedCapture: return true
8386
default: return false
8487
}
8588
}
@@ -103,6 +106,7 @@ extension AST.Group.Kind {
103106
public var name: String? {
104107
switch self {
105108
case .namedCapture(let name): return name.value
109+
case .balancedCapture(let b): return b.name?.value
106110
default: return nil
107111
}
108112
}
@@ -121,5 +125,26 @@ extension AST.Group {
121125
default: return nil
122126
}
123127
}
128+
}
129+
130+
extension AST.Group {
131+
public struct BalancedCapture: Hashable {
132+
/// The name of the group, or nil if the group has no name.
133+
public var name: AST.Located<String>?
134+
135+
/// The location of the `-` in the group.
136+
public var dash: SourceLocation
124137

138+
/// The name of the prior group that the balancing group references.
139+
public var priorName: AST.Located<String>
140+
141+
public init(
142+
name: AST.Located<String>?, dash: SourceLocation,
143+
priorName: AST.Located<String>
144+
) {
145+
self.name = name
146+
self.dash = dash
147+
self.priorName = priorName
148+
}
149+
}
125150
}

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,10 @@ extension AST {
4444
return .atom() + innerCaptures
4545
case .namedCapture(let name):
4646
return .atom(name: name.value) + innerCaptures
47+
case .balancedCapture(let b):
48+
return .atom(name: b.name?.value) + innerCaptures
4749
default:
50+
precondition(!group.kind.value.isCapturing)
4851
return innerCaptures
4952
}
5053
case .groupTransform(let group, let transform):

Sources/_MatchingEngine/Regex/Parse/Diagnostics.swift

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@ enum ParseError: Error, Hashable {
3838

3939
case cannotReferToWholePattern
4040

41+
case notQuantifiable
42+
43+
case backtrackingDirectiveMustHaveName(String)
44+
4145
case unknownGroupKind(String)
46+
case unknownCalloutKind(String)
4247

4348
case invalidMatchingOption(Character)
4449
case cannotRemoveMatchingOptionsAfterCaret
@@ -50,6 +55,9 @@ enum ParseError: Error, Hashable {
5055
case emptyProperty
5156

5257
case expectedGroupSpecifier
58+
case expectedGroupName
59+
case groupNameMustBeAlphaNumeric
60+
case groupNameCannotStartWithNumber
5361
case cannotRemoveTextSegmentOptions
5462
}
5563

@@ -80,12 +88,18 @@ extension ParseError: CustomStringConvertible {
8088
return "expected escape sequence"
8189
case .cannotReferToWholePattern:
8290
return "cannot refer to whole pattern here"
91+
case .notQuantifiable:
92+
return "expression is not quantifiable"
93+
case .backtrackingDirectiveMustHaveName(let b):
94+
return "backtracking directive '\(b)' must include name"
8395
case let .tooManyBranchesInConditional(i):
8496
return "expected 2 branches in conditional, have \(i)"
8597
case let .unsupportedCondition(str):
8698
return "\(str) cannot be used as condition"
8799
case let .unknownGroupKind(str):
88100
return "unknown group kind '(\(str)'"
101+
case let .unknownCalloutKind(str):
102+
return "unknown callout kind '\(str)'"
89103
case let .invalidMatchingOption(c):
90104
return "invalid matching option '\(c)'"
91105
case .cannotRemoveMatchingOptionsAfterCaret:
@@ -102,6 +116,12 @@ extension ParseError: CustomStringConvertible {
102116
return "empty property"
103117
case .expectedGroupSpecifier:
104118
return "expected group specifier"
119+
case .expectedGroupName:
120+
return "expected group name"
121+
case .groupNameMustBeAlphaNumeric:
122+
return "group name must only contain alphanumeric characters"
123+
case .groupNameCannotStartWithNumber:
124+
return "group name must not start with number"
105125
case .cannotRemoveTextSegmentOptions:
106126
return "text segment mode cannot be unset, only changed"
107127
}

0 commit comments

Comments
 (0)