Skip to content

Commit 0b18557

Browse files
committed
Implement semantic diagnostics
Start emitting errors for unsupported constructs, and other semantic errors such as duplicate group names. Once we start emitting bytecode for regex at compile time, these errors could potentially be subsumed into the bytecode generator. But for now, implement them as a separate pass.
1 parent c24458a commit 0b18557

File tree

9 files changed

+793
-235
lines changed

9 files changed

+793
-235
lines changed

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ enum ParseError: Error, Hashable {
1515
// TODO: I wonder if it makes sense to store the string.
1616
// This can make equality weird.
1717

18+
// MARK: Syntactic Errors
19+
1820
case numberOverflow(String)
1921
case expectedNumDigits(String, Int)
2022
case expectedNumber(String, kind: RadixKind)
@@ -55,7 +57,6 @@ enum ParseError: Error, Hashable {
5557
case cannotRemoveMatchingOptionsAfterCaret
5658

5759
case expectedCustomCharacterClassMembers
58-
case invalidCharacterClassRangeOperand
5960

6061
case emptyProperty
6162
case unknownProperty(key: String?, value: String)
@@ -73,6 +74,15 @@ enum ParseError: Error, Hashable {
7374
case cannotRemoveExtendedSyntaxInMultilineMode
7475

7576
case expectedCalloutArgument
77+
78+
// MARK: Semantic Errors
79+
80+
case unsupported(String)
81+
case deprecatedUnicode(String)
82+
case invalidReference(Int)
83+
case duplicateNamedCapture(String)
84+
case invalidCharacterClassRangeOperand
85+
case invalidQuantifierRange(Int, Int)
7686
}
7787

7888
extension IdentifierKind {
@@ -88,6 +98,7 @@ extension IdentifierKind {
8898
extension ParseError: CustomStringConvertible {
8999
var description: String {
90100
switch self {
101+
// MARK: Syntactic Errors
91102
case let .numberOverflow(s):
92103
return "number overflow: \(s)"
93104
case let .expectedNumDigits(s, i):
@@ -167,6 +178,19 @@ extension ParseError: CustomStringConvertible {
167178
return "extended syntax may not be disabled in multi-line mode"
168179
case .expectedCalloutArgument:
169180
return "expected argument to callout"
181+
182+
// MARK: Semantic Errors
183+
184+
case let .unsupported(kind):
185+
return "\(kind) is not currently supported"
186+
case let .deprecatedUnicode(kind):
187+
return "\(kind) is a deprecated Unicode property, and is not supported"
188+
case let .invalidReference(i):
189+
return "no capture numbered \(i)"
190+
case let .duplicateNamedCapture(str):
191+
return "group named '\(str)' already exists"
192+
case let .invalidQuantifierRange(lhs, rhs):
193+
return "range lower bound '\(lhs)' must be less than or equal to upper bound '\(rhs)'"
170194
}
171195
}
172196
}

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -543,11 +543,6 @@ extension Parser {
543543
// Range between atoms.
544544
if let (dashLoc, rhs) =
545545
try source.lexCustomCharClassRangeEnd(context: context) {
546-
guard atom.isValidCharacterClassRangeBound &&
547-
rhs.isValidCharacterClassRangeBound else {
548-
throw ParseError.invalidCharacterClassRangeOperand
549-
}
550-
// TODO: Validate lower <= upper?
551546
members.append(.range(.init(atom, dashLoc, rhs)))
552547
continue
553548
}
@@ -575,7 +570,14 @@ public func parse<S: StringProtocol>(
575570
{
576571
let source = Source(String(regex))
577572
var parser = Parser(source, syntax: syntax)
578-
return try parser.parse()
573+
let ast = try parser.parse()
574+
switch stage {
575+
case .syntactic:
576+
break
577+
case .semantic:
578+
try validate(ast)
579+
}
580+
return ast
579581
}
580582

581583
@available(*, deprecated, renamed: "parse(_:_:_:)")

0 commit comments

Comments
 (0)