Skip to content

Commit 940139e

Browse files
committed
Fix \o parsing crash
This was caused by the fact that we'd walk into `expectUnicodeScalar` if we saw `\o`, but we only want to parse `\o{`. Instead, change it to be a `lex..` method, and bail if we don't lex a scalar.
1 parent 8881883 commit 940139e

File tree

2 files changed

+16
-20
lines changed

2 files changed

+16
-20
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,8 @@ extension Parser {
422422
return .scalarSequence(.init(scalars, trivia: trivia))
423423
}
424424

425-
/// Eat a scalar off the front, starting from after the
426-
/// backslash and base character (e.g. `\u` or `\x`).
425+
/// Try to eat a scalar off the front, starting from after the backslash and
426+
/// base character (e.g. `\u` or `\x`).
427427
///
428428
/// UniScalar -> 'u{' UniScalarSequence '}'
429429
/// | 'u' HexDigit{4}
@@ -433,18 +433,16 @@ extension Parser {
433433
/// | 'o{' OctalDigit{1...} '}'
434434
/// | '0' OctalDigit{0...3}
435435
///
436-
mutating func expectUnicodeScalar(
437-
escapedCharacter base: Character
438-
) -> AST.Atom.Kind {
439-
recordLoc { p in
436+
mutating func lexUnicodeScalar() -> AST.Atom.Kind? {
437+
tryEating { p in
440438

441439
func nullScalar() -> AST.Atom.Scalar {
442440
.init(UnicodeScalar(0), p.loc(p.src.currentPosition))
443441
}
444442

445443
// TODO: PCRE offers a different behavior if PCRE2_ALT_BSUX is set.
446-
switch base {
447-
// Hex numbers.
444+
switch p.tryEat() {
445+
// Hex numbers.
448446
case "u" where p.tryEat("{"):
449447
return p.expectUnicodeScalarSequence(eating: "}")
450448

@@ -469,7 +467,7 @@ extension Parser {
469467
case "U":
470468
return .scalar(p.expectUnicodeScalar(numDigits: 8))
471469

472-
// Octal numbers.
470+
// Octal numbers.
473471
case "o" where p.tryEat("{"):
474472
let str = p.lexUntil(eating: "}")
475473
return .scalar(p.validateUnicodeScalar(str, .octal))
@@ -485,10 +483,9 @@ extension Parser {
485483
return .scalar(p.validateUnicodeScalar(digits, .octal))
486484

487485
default:
488-
p.unreachable("Unexpected scalar start")
489-
return .scalar(nullScalar())
486+
return nil
490487
}
491-
}.value
488+
}
492489
}
493490

494491
/// Try to consume a quantifier
@@ -1754,6 +1751,11 @@ extension Parser {
17541751
return ref
17551752
}
17561753

1754+
// Hexadecimal and octal unicode scalars.
1755+
if let scalar = p.lexUnicodeScalar() {
1756+
return scalar
1757+
}
1758+
17571759
guard let charLoc = p.tryEatWithLoc() else {
17581760
p.errorAtCurrentPosition(.expectedEscape)
17591761
return .invalid
@@ -1767,14 +1769,6 @@ extension Parser {
17671769
return .escaped(builtin)
17681770
}
17691771

1770-
switch char {
1771-
// Hexadecimal and octal unicode scalars.
1772-
case "u", "x", "U", "o", "0":
1773-
return p.expectUnicodeScalar(escapedCharacter: char)
1774-
default:
1775-
break
1776-
}
1777-
17781772
// We only allow unknown escape sequences for non-letter non-number ASCII,
17791773
// and non-ASCII whitespace.
17801774
// TODO: Once we have fix-its, suggest a `0` prefix for octal `[\7]`.

Tests/RegexTests/ParseTests.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2806,6 +2806,8 @@ extension RegexTests {
28062806

28072807
diagnosticTest("\\", .expectedEscape)
28082808

2809+
diagnosticTest(#"\o"#, .invalidEscape("o"))
2810+
28092811
// TODO: Custom diagnostic for control sequence
28102812
diagnosticTest(#"\c"#, .unexpectedEndOfInput)
28112813

0 commit comments

Comments
 (0)