Skip to content

Commit 7f068dc

Browse files
authored
Merge pull request #379 from hamishknight/sema
2 parents 39c0ed5 + c95e862 commit 7f068dc

18 files changed

+928
-285
lines changed

Sources/PatternConverter/PatternConverter.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ struct PatternConverter: ParsableCommand {
5050
print("Converting '\(delim)\(regex)\(delim)'")
5151

5252
let ast = try _RegexParser.parse(
53-
regex,
53+
regex, .semantic,
5454
experimentalSyntax ? .experimental : .traditional)
5555

5656
// Show rendered source ranges

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,23 @@ extension AST.Atom.EscapedBuiltin {
668668
return nil
669669
}
670670
}
671+
672+
public var isQuantifiable: Bool {
673+
switch self {
674+
case .alarm, .escape, .formfeed, .newline, .carriageReturn, .tab,
675+
.singleDataUnit, .decimalDigit, .notDecimalDigit, .horizontalWhitespace,
676+
.notHorizontalWhitespace, .notNewline, .newlineSequence, .whitespace,
677+
.notWhitespace, .verticalTab, .notVerticalTab, .wordCharacter,
678+
.notWordCharacter, .backspace, .graphemeCluster, .trueAnychar:
679+
return true
680+
681+
case .wordBoundary, .notWordBoundary, .startOfSubject,
682+
.endOfSubjectBeforeNewline, .endOfSubject,
683+
.firstMatchingPositionInSubject, .resetStartOfMatch, .textSegment,
684+
.notTextSegment:
685+
return false
686+
}
687+
}
671688
}
672689

673690
extension AST.Atom {
@@ -749,6 +766,8 @@ extension AST.Atom {
749766
case .changeMatchingOptions:
750767
return false
751768
// TODO: Are callouts quantifiable?
769+
case .escaped(let esc):
770+
return esc.isQuantifiable
752771
default:
753772
return true
754773
}

Sources/_RegexParser/Regex/Parse/CaptureList.swift

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,18 @@ extension CaptureList {
2626
public var name: String?
2727
public var type: Any.Type?
2828
public var optionalDepth: Int
29+
public var location: SourceLocation
2930

3031
public init(
3132
name: String? = nil,
3233
type: Any.Type? = nil,
33-
optionalDepth: Int
34+
optionalDepth: Int,
35+
_ location: SourceLocation
3436
) {
3537
self.name = name
3638
self.type = type
3739
self.optionalDepth = optionalDepth
40+
self.location = location
3841
}
3942
}
4043
}
@@ -61,13 +64,14 @@ extension AST.Node {
6164
case let .group(g):
6265
switch g.kind.value {
6366
case .capture:
64-
list.append(.init(optionalDepth: nesting))
67+
list.append(.init(optionalDepth: nesting, g.location))
6568

6669
case .namedCapture(let name):
67-
list.append(.init(name: name.value, optionalDepth: nesting))
70+
list.append(.init(name: name.value, optionalDepth: nesting, g.location))
6871

6972
case .balancedCapture(let b):
70-
list.append(.init(name: b.name?.value, optionalDepth: nesting))
73+
list.append(.init(name: b.name?.value, optionalDepth: nesting,
74+
g.location))
7175

7276
default: break
7377
}
@@ -124,7 +128,8 @@ extension CaptureList.Capture: Equatable {
124128
public static func == (lhs: Self, rhs: Self) -> Bool {
125129
lhs.name == rhs.name &&
126130
lhs.optionalDepth == rhs.optionalDepth &&
127-
lhs.type == rhs.type
131+
lhs.type == rhs.type &&
132+
lhs.location == rhs.location
128133
}
129134
}
130135
extension CaptureList: Equatable {}

Sources/_RegexParser/Regex/Parse/CompilerInterface.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public func swiftCompilerParseRegexLiteral(
9696
_ input: String, captureBufferOut: UnsafeMutableRawBufferPointer
9797
) throws -> (regexToEmit: String, version: Int) {
9898
do {
99-
let ast = try parseWithDelimiters(input)
99+
let ast = try parseWithDelimiters(input, .semantic)
100100
// Serialize the capture structure for later type inference.
101101
assert(captureBufferOut.count >= input.utf8.count)
102102
ast.captureStructure.encode(to: captureBufferOut)

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ enum ParseError: Error, Hashable {
1515
// TODO: I wonder if it makes sense to store the string.
1616
// This can make equality weird.
1717

18+
// MARK: Syntactic Errors
19+
1820
case numberOverflow(String)
1921
case expectedNumDigits(String, Int)
2022
case expectedNumber(String, kind: RadixKind)
@@ -43,7 +45,6 @@ enum ParseError: Error, Hashable {
4345

4446
case cannotReferToWholePattern
4547

46-
case notQuantifiable
4748
case quantifierRequiresOperand(String)
4849

4950
case backtrackingDirectiveMustHaveName(String)
@@ -55,7 +56,6 @@ enum ParseError: Error, Hashable {
5556
case cannotRemoveMatchingOptionsAfterCaret
5657

5758
case expectedCustomCharacterClassMembers
58-
case invalidCharacterClassRangeOperand
5959

6060
case emptyProperty
6161
case unknownProperty(key: String?, value: String)
@@ -73,6 +73,17 @@ enum ParseError: Error, Hashable {
7373
case cannotRemoveExtendedSyntaxInMultilineMode
7474

7575
case expectedCalloutArgument
76+
77+
// MARK: Semantic Errors
78+
79+
case unsupported(String)
80+
case deprecatedUnicode(String)
81+
case invalidReference(Int)
82+
case duplicateNamedCapture(String)
83+
case invalidCharacterClassRangeOperand
84+
case invalidQuantifierRange(Int, Int)
85+
case invalidCharacterRange(from: Character, to: Character)
86+
case notQuantifiable
7687
}
7788

7889
extension IdentifierKind {
@@ -88,6 +99,7 @@ extension IdentifierKind {
8899
extension ParseError: CustomStringConvertible {
89100
var description: String {
90101
switch self {
102+
// MARK: Syntactic Errors
91103
case let .numberOverflow(s):
92104
return "number overflow: \(s)"
93105
case let .expectedNumDigits(s, i):
@@ -114,8 +126,6 @@ extension ParseError: CustomStringConvertible {
114126
return "invalid escape sequence '\\\(c)'"
115127
case .cannotReferToWholePattern:
116128
return "cannot refer to whole pattern here"
117-
case .notQuantifiable:
118-
return "expression is not quantifiable"
119129
case .quantifierRequiresOperand(let q):
120130
return "quantifier '\(q)' must appear after expression"
121131
case .backtrackingDirectiveMustHaveName(let b):
@@ -167,6 +177,23 @@ extension ParseError: CustomStringConvertible {
167177
return "extended syntax may not be disabled in multi-line mode"
168178
case .expectedCalloutArgument:
169179
return "expected argument to callout"
180+
181+
// MARK: Semantic Errors
182+
183+
case let .unsupported(kind):
184+
return "\(kind) is not currently supported"
185+
case let .deprecatedUnicode(kind):
186+
return "\(kind) is a deprecated Unicode property, and is not supported"
187+
case let .invalidReference(i):
188+
return "no capture numbered \(i)"
189+
case let .duplicateNamedCapture(str):
190+
return "group named '\(str)' already exists"
191+
case let .invalidQuantifierRange(lhs, rhs):
192+
return "range lower bound '\(lhs)' must be less than or equal to upper bound '\(rhs)'"
193+
case let .invalidCharacterRange(from: lhs, to: rhs):
194+
return "character '\(lhs)' must compare less than or equal to '\(rhs)'"
195+
case .notQuantifiable:
196+
return "expression is not quantifiable"
170197
}
171198
}
172199
}

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,6 @@ extension Parser {
227227
if let (amt, kind, trivia) =
228228
try source.lexQuantifier(context: context) {
229229
let location = loc(_start)
230-
guard operand.isQuantifiable else {
231-
throw Source.LocatedError(ParseError.notQuantifiable, location)
232-
}
233230
result.append(.quantification(
234231
.init(amt, kind, operand, location, trivia: trivia)))
235232
} else {
@@ -543,11 +540,6 @@ extension Parser {
543540
// Range between atoms.
544541
if let (dashLoc, rhs) =
545542
try source.lexCustomCharClassRangeEnd(context: context) {
546-
guard atom.isValidCharacterClassRangeBound &&
547-
rhs.isValidCharacterClassRangeBound else {
548-
throw ParseError.invalidCharacterClassRangeOperand
549-
}
550-
// TODO: Validate lower <= upper?
551543
members.append(.range(.init(atom, dashLoc, rhs)))
552544
continue
553545
}
@@ -558,13 +550,31 @@ extension Parser {
558550
}
559551
}
560552

553+
public enum ASTStage {
554+
/// The regex is parsed, and a syntactically valid AST is returned. Otherwise
555+
/// an error is thrown. This is useful for e.g syntax coloring.
556+
case syntactic
557+
558+
/// The regex is parsed, and a syntactically and semantically valid AST is
559+
/// returned. Otherwise an error is thrown. A semantically valid AST has been
560+
/// checked for e.g unsupported constructs and invalid backreferences.
561+
case semantic
562+
}
563+
561564
public func parse<S: StringProtocol>(
562-
_ regex: S, _ syntax: SyntaxOptions
565+
_ regex: S, _ stage: ASTStage, _ syntax: SyntaxOptions
563566
) throws -> AST where S.SubSequence == Substring
564567
{
565568
let source = Source(String(regex))
566569
var parser = Parser(source, syntax: syntax)
567-
return try parser.parse()
570+
let ast = try parser.parse()
571+
switch stage {
572+
case .syntactic:
573+
break
574+
case .semantic:
575+
try validate(ast)
576+
}
577+
return ast
568578
}
569579

570580
/// Retrieve the default set of syntax options that a delimiter and literal
@@ -591,11 +601,12 @@ fileprivate func defaultSyntaxOptions(
591601
/// Parses a given regex string with delimiters, inferring the syntax options
592602
/// from the delimiters used.
593603
public func parseWithDelimiters<S: StringProtocol>(
594-
_ regex: S
604+
_ regex: S, _ stage: ASTStage
595605
) throws -> AST where S.SubSequence == Substring {
596606
let (contents, delim) = droppingRegexDelimiters(String(regex))
597607
do {
598-
return try parse(contents, defaultSyntaxOptions(delim, contents: contents))
608+
let syntax = defaultSyntaxOptions(delim, contents: contents)
609+
return try parse(contents, stage, syntax)
599610
} catch let error as LocatedErrorProtocol {
600611
// Convert the range in 'contents' to the range in 'regex'.
601612
let delimCount = delim.opening.count

0 commit comments

Comments
 (0)