Skip to content

Commit c95e862

Browse files
committed
Address review feedback
- Make `\h` and `\H` supported for now - Check character class ranges - Diagnose unquantifiable escape sequences
1 parent 466b375 commit c95e862

File tree

5 files changed

+62
-22
lines changed

5 files changed

+62
-22
lines changed

Sources/_RegexParser/Regex/AST/Atom.swift

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,6 +668,23 @@ extension AST.Atom.EscapedBuiltin {
668668
return nil
669669
}
670670
}
671+
672+
public var isQuantifiable: Bool {
673+
switch self {
674+
case .alarm, .escape, .formfeed, .newline, .carriageReturn, .tab,
675+
.singleDataUnit, .decimalDigit, .notDecimalDigit, .horizontalWhitespace,
676+
.notHorizontalWhitespace, .notNewline, .newlineSequence, .whitespace,
677+
.notWhitespace, .verticalTab, .notVerticalTab, .wordCharacter,
678+
.notWordCharacter, .backspace, .graphemeCluster, .trueAnychar:
679+
return true
680+
681+
case .wordBoundary, .notWordBoundary, .startOfSubject,
682+
.endOfSubjectBeforeNewline, .endOfSubject,
683+
.firstMatchingPositionInSubject, .resetStartOfMatch, .textSegment,
684+
.notTextSegment:
685+
return false
686+
}
687+
}
671688
}
672689

673690
extension AST.Atom {
@@ -749,6 +766,8 @@ extension AST.Atom {
749766
case .changeMatchingOptions:
750767
return false
751768
// TODO: Are callouts quantifiable?
769+
case .escaped(let esc):
770+
return esc.isQuantifiable
752771
default:
753772
return true
754773
}

Sources/_RegexParser/Regex/Parse/Diagnostics.swift

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ enum ParseError: Error, Hashable {
4545

4646
case cannotReferToWholePattern
4747

48-
case notQuantifiable
4948
case quantifierRequiresOperand(String)
5049

5150
case backtrackingDirectiveMustHaveName(String)
@@ -83,6 +82,8 @@ enum ParseError: Error, Hashable {
8382
case duplicateNamedCapture(String)
8483
case invalidCharacterClassRangeOperand
8584
case invalidQuantifierRange(Int, Int)
85+
case invalidCharacterRange(from: Character, to: Character)
86+
case notQuantifiable
8687
}
8788

8889
extension IdentifierKind {
@@ -125,8 +126,6 @@ extension ParseError: CustomStringConvertible {
125126
return "invalid escape sequence '\\\(c)'"
126127
case .cannotReferToWholePattern:
127128
return "cannot refer to whole pattern here"
128-
case .notQuantifiable:
129-
return "expression is not quantifiable"
130129
case .quantifierRequiresOperand(let q):
131130
return "quantifier '\(q)' must appear after expression"
132131
case .backtrackingDirectiveMustHaveName(let b):
@@ -191,6 +190,10 @@ extension ParseError: CustomStringConvertible {
191190
return "group named '\(str)' already exists"
192191
case let .invalidQuantifierRange(lhs, rhs):
193192
return "range lower bound '\(lhs)' must be less than or equal to upper bound '\(rhs)'"
193+
case let .invalidCharacterRange(from: lhs, to: rhs):
194+
return "character '\(lhs)' must compare less than or equal to '\(rhs)'"
195+
case .notQuantifiable:
196+
return "expression is not quantifiable"
194197
}
195198
}
196199
}

Sources/_RegexParser/Regex/Parse/Parse.swift

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,6 @@ extension Parser {
227227
if let (amt, kind, trivia) =
228228
try source.lexQuantifier(context: context) {
229229
let location = loc(_start)
230-
guard operand.isQuantifiable else {
231-
throw Source.LocatedError(ParseError.notQuantifiable, location)
232-
}
233230
result.append(.quantification(
234231
.init(amt, kind, operand, location, trivia: trivia)))
235232
} else {

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,17 +182,15 @@ extension RegexValidator {
182182
_ esc: AST.Atom.EscapedBuiltin, at loc: SourceLocation
183183
) throws {
184184
switch esc {
185-
case .resetStartOfMatch, .singleDataUnit, .horizontalWhitespace,
186-
.notHorizontalWhitespace, .verticalTab, .notVerticalTab,
185+
case .resetStartOfMatch, .singleDataUnit, .verticalTab, .notVerticalTab,
187186
// '\N' needs to be emitted using 'emitAny'.
188187
.notNewline:
189188
throw error(.unsupported("'\\\(esc.character)'"), at: loc)
190189

191190
// Character classes.
192191
case .decimalDigit, .notDecimalDigit, .whitespace, .notWhitespace,
193-
.wordCharacter, .notWordCharacter, .graphemeCluster, .trueAnychar:
194-
// TODO: What about scalar matching mode for .graphemeCluster? We
195-
// currently crash at runtime.
192+
.wordCharacter, .notWordCharacter, .graphemeCluster, .trueAnychar,
193+
.horizontalWhitespace, .notHorizontalWhitespace:
196194
break
197195

198196
case .newlineSequence:
@@ -271,18 +269,20 @@ extension RegexValidator {
271269
throw error(.invalidCharacterClassRangeOperand, at: rhs.location)
272270
}
273271

274-
guard lhs.literalCharacterValue != nil else {
272+
guard let lhsChar = lhs.literalCharacterValue else {
275273
throw error(
276274
.unsupported("character class range operand"), at: lhs.location)
277275
}
278276

279-
guard rhs.literalCharacterValue != nil else {
277+
guard let rhsChar = rhs.literalCharacterValue else {
280278
throw error(
281279
.unsupported("character class range operand"), at: rhs.location)
282280
}
283281

284-
// TODO: Validate lhs <= rhs? That may require knowledge of case
285-
// insensitivity though.
282+
guard lhsChar <= rhsChar else {
283+
throw error(
284+
.invalidCharacterRange(from: lhsChar, to: rhsChar), at: range.dashLoc)
285+
}
286286
}
287287

288288
func validateCharacterClassMember(
@@ -341,6 +341,9 @@ extension RegexValidator {
341341

342342
func validateQuantification(_ quant: AST.Quantification) throws {
343343
try validateNode(quant.child)
344+
guard quant.child.isQuantifiable else {
345+
throw error(.notQuantifiable, at: quant.child.location)
346+
}
344347
switch quant.amount.value {
345348
case .range(let lhs, let rhs):
346349
guard lhs.value <= rhs.value else {

Tests/RegexTests/ParseTests.swift

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,8 @@ extension RegexTests {
503503
parseTest("[-a-]", charClass("-", "a", "-"))
504504

505505
parseTest("[a-z]", charClass(range_m("a", "z")))
506+
parseTest("[a-a]", charClass(range_m("a", "a")))
507+
parseTest("[B-a]", charClass(range_m("B", "a")))
506508

507509
// FIXME: AST builder helpers for custom char class types
508510
parseTest("[a-d--a-c]", charClass(
@@ -2442,6 +2444,11 @@ extension RegexTests {
24422444

24432445
diagnosticTest(#"|([\d-c])?"#, .invalidCharacterClassRangeOperand)
24442446

2447+
diagnosticTest(#"[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
2448+
diagnosticTest(#"(?i)[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
2449+
diagnosticTest(#"[c-b]"#, .invalidCharacterRange(from: "c", to: "b"))
2450+
diagnosticTest(#"[\u{66}-\u{65}]"#, .invalidCharacterRange(from: "\u{66}", to: "\u{65}"))
2451+
24452452
// MARK: Bad escapes
24462453

24472454
diagnosticTest("\\", .expectedEscape)
@@ -2555,6 +2562,17 @@ extension RegexTests {
25552562
diagnosticTest("{1,3}", .quantifierRequiresOperand("{1,3}"))
25562563
diagnosticTest("a{3,2}", .invalidQuantifierRange(3, 2))
25572564

2565+
// These are not quantifiable.
2566+
diagnosticTest(#"\b?"#, .notQuantifiable)
2567+
diagnosticTest(#"\B*"#, .notQuantifiable)
2568+
diagnosticTest(#"\A+"#, .notQuantifiable)
2569+
diagnosticTest(#"\Z??"#, .notQuantifiable)
2570+
diagnosticTest(#"\G*?"#, .notQuantifiable)
2571+
diagnosticTest(#"\z+?"#, .notQuantifiable)
2572+
diagnosticTest(#"\K{1}"#, .unsupported(#"'\K'"#))
2573+
diagnosticTest(#"\y{2,5}"#, .notQuantifiable)
2574+
diagnosticTest(#"\Y{3,}"#, .notQuantifiable)
2575+
25582576
// MARK: Unicode scalars
25592577

25602578
diagnosticTest(#"\u{G}"#, .expectedNumber("G", kind: .hex))
@@ -2641,13 +2659,13 @@ extension RegexTests {
26412659

26422660
diagnosticTest("(*MARK)", .backtrackingDirectiveMustHaveName("MARK"))
26432661
diagnosticTest("(*:)", .expectedNonEmptyContents)
2644-
diagnosticTest("(*MARK:a)?", .notQuantifiable)
2645-
diagnosticTest("(*FAIL)+", .notQuantifiable)
2646-
diagnosticTest("(*COMMIT:b)*", .notQuantifiable)
2647-
diagnosticTest("(*PRUNE:a)??", .notQuantifiable)
2648-
diagnosticTest("(*SKIP:a)*?", .notQuantifiable)
2649-
diagnosticTest("(*F)+?", .notQuantifiable)
2650-
diagnosticTest("(*:a){2}", .notQuantifiable)
2662+
diagnosticTest("(*MARK:a)?", .unsupported("backtracking directive"))
2663+
diagnosticTest("(*FAIL)+", .unsupported("backtracking directive"))
2664+
diagnosticTest("(*COMMIT:b)*", .unsupported("backtracking directive"))
2665+
diagnosticTest("(*PRUNE:a)??", .unsupported("backtracking directive"))
2666+
diagnosticTest("(*SKIP:a)*?", .unsupported("backtracking directive"))
2667+
diagnosticTest("(*F)+?", .unsupported("backtracking directive"))
2668+
diagnosticTest("(*:a){2}", .unsupported("backtracking directive"))
26512669

26522670
// MARK: Oniguruma absent functions
26532671

0 commit comments

Comments
 (0)