Skip to content

Commit c14c9ba

Browse files
authored
Rollup merge of #77629 - Julian-Wollersberger:recomputeRawStrError, r=varkor
Cleanup of `eat_while()` in lexer The size of a lexer Token was inflated by the largest `TokenKind` variants `LiteralKind::RawStr` and `RawByteStr`, because * it used `usize` although `u32` is sufficient in rustc, since crates must be smaller than 4GB, * and it stored the 20 bytes big `RawStrError` enum for error reporting. If a raw string is invalid, it now needs to be reparsed to get the `RawStrError` data, but that is a very cold code path. Technically this breaks other tools that depend on rustc_lexer because they are now also restricted to a max file size of 4GB. But this shouldn't matter in practice, and rustc_lexer isn't stable anyway. Can I also get a perf run? Edit: This makes no difference in performance. The PR now only contains a small cleanup.
2 parents 1b13443 + bd49ded commit c14c9ba

File tree

1 file changed

+14
-20
lines changed
  • compiler/rustc_lexer/src

1 file changed

+14
-20
lines changed

compiler/rustc_lexer/src/lib.rs

+14-20
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ impl Token {
4848
}
4949

5050
/// Enum representing common lexeme types.
51+
// perf note: Changing all `usize` to `u32` doesn't change performance. See #77629
5152
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
5253
pub enum TokenKind {
5354
// Multi-char tokens:
@@ -160,6 +161,7 @@ pub enum LiteralKind {
160161
/// - `r##~"abcde"##`: `InvalidStarter`
161162
/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
162163
/// - Too many `#`s (>65535): `TooManyDelimiters`
164+
// perf note: It doesn't matter that this makes `Token` 36 bytes bigger. See #77629
163165
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
164166
pub enum RawStrError {
165167
/// Non `#` characters exist between `r` and `"` eg. `r#~"..`
@@ -689,7 +691,12 @@ impl Cursor<'_> {
689691
let mut max_hashes = 0;
690692

691693
// Count opening '#' symbols.
692-
let n_start_hashes = self.eat_while(|c| c == '#');
694+
let mut eaten = 0;
695+
while self.first() == '#' {
696+
eaten += 1;
697+
self.bump();
698+
}
699+
let n_start_hashes = eaten;
693700

694701
// Check that string is started.
695702
match self.bump() {
@@ -724,16 +731,11 @@ impl Cursor<'_> {
724731
// Note that this will not consume extra trailing `#` characters:
725732
// `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
726733
// followed by a `#` token.
727-
let mut hashes_left = n_start_hashes;
728-
let is_closing_hash = |c| {
729-
if c == '#' && hashes_left != 0 {
730-
hashes_left -= 1;
731-
true
732-
} else {
733-
false
734-
}
735-
};
736-
let n_end_hashes = self.eat_while(is_closing_hash);
734+
let mut n_end_hashes = 0;
735+
while self.first() == '#' && n_end_hashes < n_start_hashes {
736+
n_end_hashes += 1;
737+
self.bump();
738+
}
737739

738740
if n_end_hashes == n_start_hashes {
739741
return (n_start_hashes, None);
@@ -807,17 +809,9 @@ impl Cursor<'_> {
807809
}
808810

809811
/// Eats symbols while predicate returns true or until the end of file is reached.
810-
/// Returns amount of eaten symbols.
811-
fn eat_while<F>(&mut self, mut predicate: F) -> usize
812-
where
813-
F: FnMut(char) -> bool,
814-
{
815-
let mut eaten: usize = 0;
812+
fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
816813
while predicate(self.first()) && !self.is_eof() {
817-
eaten += 1;
818814
self.bump();
819815
}
820-
821-
eaten
822816
}
823817
}

0 commit comments

Comments
 (0)