Skip to content

Commit 06cd27c

Browse files
committed
Fix \o parsing crash
This was caused by the fact that we'd walk into `expectUnicodeScalar` if we saw `\o`, but we only want to parse `\o{`. Instead, change it to be a `lex..` method, and bail if we don't lex a scalar.
1 parent 635776f commit 06cd27c

File tree

2 files changed

+16
-20
lines changed

2 files changed

+16
-20
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,8 @@ extension Parser {
422422
return .scalarSequence(.init(scalars, trivia: trivia))
423423
}
424424

425-
/// Eat a scalar off the front, starting from after the
426-
/// backslash and base character (e.g. `\u` or `\x`).
425+
/// Try to eat a scalar off the front, starting from after the backslash and
426+
/// base character (e.g. `\u` or `\x`).
427427
///
428428
/// UniScalar -> 'u{' UniScalarSequence '}'
429429
/// | 'u' HexDigit{4}
@@ -433,18 +433,16 @@ extension Parser {
433433
/// | 'o{' OctalDigit{1...} '}'
434434
/// | '0' OctalDigit{0...3}
435435
///
436-
mutating func expectUnicodeScalar(
437-
escapedCharacter base: Character
438-
) -> AST.Atom.Kind {
439-
recordLoc { p in
436+
mutating func lexUnicodeScalar() -> AST.Atom.Kind? {
437+
tryEating { p in
440438

441439
func nullScalar() -> AST.Atom.Scalar {
442440
.init(UnicodeScalar(0), p.loc(p.src.currentPosition))
443441
}
444442

445443
// TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
446-
switch base {
447-
// Hex numbers.
444+
switch p.tryEat() {
445+
// Hex numbers.
448446
case "u" where p.tryEat("{"):
449447
return p.expectUnicodeScalarSequence(eating: "}")
450448

@@ -469,7 +467,7 @@ extension Parser {
469467
case "U":
470468
return .scalar(p.expectUnicodeScalar(numDigits: 8))
471469

472-
// Octal numbers.
470+
// Octal numbers.
473471
case "o" where p.tryEat("{"):
474472
let str = p.lexUntil(eating: "}")
475473
return .scalar(p.validateUnicodeScalar(str, .octal))
@@ -485,10 +483,9 @@ extension Parser {
485483
return .scalar(p.validateUnicodeScalar(digits, .octal))
486484

487485
default:
488-
p.unreachable("Unexpected scalar start")
489-
return .scalar(nullScalar())
486+
return nil
490487
}
491-
}.value
488+
}
492489
}
493490

494491
/// Try to consume a quantifier
@@ -1739,6 +1736,11 @@ extension Parser {
17391736
return ref
17401737
}
17411738

1739+
// Hexadecimal and octal unicode scalars.
1740+
if let scalar = p.lexUnicodeScalar() {
1741+
return scalar
1742+
}
1743+
17421744
guard let charLoc = p.tryEatWithLoc() else {
17431745
p.errorAtCurrentPosition(.expectedEscape)
17441746
return .invalid
@@ -1752,14 +1754,6 @@ extension Parser {
17521754
return .escaped(builtin)
17531755
}
17541756

1755-
switch char {
1756-
// Hexadecimal and octal unicode scalars.
1757-
case "u", "x", "U", "o", "0":
1758-
return p.expectUnicodeScalar(escapedCharacter: char)
1759-
default:
1760-
break
1761-
}
1762-
17631757
// We only allow unknown escape sequences for non-letter non-number ASCII,
17641758
// and non-ASCII whitespace.
17651759
// TODO: Once we have fix-its, suggest a `0` prefix for octal `[\7]`.

Tests/RegexTests/ParseTests.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2800,6 +2800,8 @@ extension RegexTests {
28002800

28012801
diagnosticTest("\\", .expectedEscape)
28022802

2803+
diagnosticTest(#"\o"#, .invalidEscape("o"))
2804+
28032805
// TODO: Custom diagnostic for control sequence
28042806
diagnosticTest(#"\c"#, .unexpectedEndOfInput)
28052807

0 commit comments

Comments
 (0)