Skip to content

Commit f916c44

Browse files
authored
Rollup merge of #105076 - mina86:a, r=scottmcm
Refactor core::char::EscapeDefault and co. structures Change core::char::{EscapeUnicode, EscapeDefault and EscapeDebug} structures from using a state machine to computing escaped sequence upfront and during iteration just going through the characters. This is arguably simpler since it’s easier to think about having a buffer and start..end range to iterate over rather than thinking about a state machine. This also harmonises implementation of aforementioned iterators and core::ascii::EscapeDefault struct. This is done by introducing a new helper EscapeIterInner struct which holds the buffer and offers simple methods for iterating over range. As a side effect, this probably optimises Display implementation for those types since rather than calling write_char repeatedly, write_str is invoked once. On 64-bit platforms, it also reduces size of some of the structs: | Struct | Before | After | |----------------------------+--------+-------+ | core::char::EscapeUnicode | 16 | 12 | | core::char::EscapeDefault | 16 | 12 | | core::char::EscapeDebug | 16 | 16 | My ulterior motive and reason why I started looking into this is addition of as_str method to the iterators. With this change this will became trivial. It’s also going to be trivial to implement DoubleEndedIterator if that’s ever desired.
2 parents 1cb6357 + 76c9947 commit f916c44

File tree

5 files changed

+273
-206
lines changed

5 files changed

+273
-206
lines changed

library/core/src/ascii.rs

+41-30
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
1010
#![stable(feature = "core_ascii", since = "1.26.0")]
1111

12+
use crate::escape;
1213
use crate::fmt;
1314
use crate::iter::FusedIterator;
14-
use crate::ops::Range;
15-
use crate::str::from_utf8_unchecked;
15+
use crate::num::NonZeroUsize;
1616

1717
/// An iterator over the escaped version of a byte.
1818
///
@@ -21,10 +21,7 @@ use crate::str::from_utf8_unchecked;
2121
#[must_use = "iterators are lazy and do nothing unless consumed"]
2222
#[stable(feature = "rust1", since = "1.0.0")]
2323
#[derive(Clone)]
24-
pub struct EscapeDefault {
25-
range: Range<u8>,
26-
data: [u8; 4],
27-
}
24+
pub struct EscapeDefault(escape::EscapeIterInner<4>);
2825

2926
/// Returns an iterator that produces an escaped version of a `u8`.
3027
///
@@ -90,21 +87,9 @@ pub struct EscapeDefault {
9087
/// ```
9188
#[stable(feature = "rust1", since = "1.0.0")]
9289
pub fn escape_default(c: u8) -> EscapeDefault {
93-
let (data, len) = match c {
94-
b'\t' => ([b'\\', b't', 0, 0], 2),
95-
b'\r' => ([b'\\', b'r', 0, 0], 2),
96-
b'\n' => ([b'\\', b'n', 0, 0], 2),
97-
b'\\' => ([b'\\', b'\\', 0, 0], 2),
98-
b'\'' => ([b'\\', b'\'', 0, 0], 2),
99-
b'"' => ([b'\\', b'"', 0, 0], 2),
100-
b'\x20'..=b'\x7e' => ([c, 0, 0, 0], 1),
101-
_ => {
102-
let hex_digits: &[u8; 16] = b"0123456789abcdef";
103-
([b'\\', b'x', hex_digits[(c >> 4) as usize], hex_digits[(c & 0xf) as usize]], 4)
104-
}
105-
};
106-
107-
return EscapeDefault { range: 0..len, data };
90+
let mut data = [0; 4];
91+
let range = escape::escape_ascii_into(&mut data, c);
92+
EscapeDefault(escape::EscapeIterInner::new(data, range))
10893
}
10994

11095
#[stable(feature = "rust1", since = "1.0.0")]
@@ -113,33 +98,59 @@ impl Iterator for EscapeDefault {
11398

11499
#[inline]
115100
fn next(&mut self) -> Option<u8> {
116-
self.range.next().map(|i| self.data[i as usize])
101+
self.0.next()
117102
}
103+
104+
#[inline]
118105
fn size_hint(&self) -> (usize, Option<usize>) {
119-
self.range.size_hint()
106+
let n = self.0.len();
107+
(n, Some(n))
108+
}
109+
110+
#[inline]
111+
fn count(self) -> usize {
112+
self.0.len()
120113
}
114+
115+
#[inline]
121116
fn last(mut self) -> Option<u8> {
122-
self.next_back()
117+
self.0.next_back()
118+
}
119+
120+
#[inline]
121+
fn advance_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
122+
self.0.advance_by(n)
123123
}
124124
}
125+
125126
#[stable(feature = "rust1", since = "1.0.0")]
126127
impl DoubleEndedIterator for EscapeDefault {
128+
#[inline]
127129
fn next_back(&mut self) -> Option<u8> {
128-
self.range.next_back().map(|i| self.data[i as usize])
130+
self.0.next_back()
131+
}
132+
133+
#[inline]
134+
fn advance_back_by(&mut self, n: usize) -> Result<(), NonZeroUsize> {
135+
self.0.advance_back_by(n)
129136
}
130137
}
138+
131139
#[stable(feature = "rust1", since = "1.0.0")]
132-
impl ExactSizeIterator for EscapeDefault {}
140+
impl ExactSizeIterator for EscapeDefault {
141+
#[inline]
142+
fn len(&self) -> usize {
143+
self.0.len()
144+
}
145+
}
146+
133147
#[stable(feature = "fused", since = "1.26.0")]
134148
impl FusedIterator for EscapeDefault {}
135149

136150
#[stable(feature = "ascii_escape_display", since = "1.39.0")]
137151
impl fmt::Display for EscapeDefault {
138152
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139-
// SAFETY: ok because `escape_default` created only valid utf-8 data
140-
f.write_str(unsafe {
141-
from_utf8_unchecked(&self.data[(self.range.start as usize)..(self.range.end as usize)])
142-
})
153+
f.write_str(self.0.as_str())
143154
}
144155
}
145156

library/core/src/char/methods.rs

+21-36
Original file line numberDiff line numberDiff line change
@@ -380,20 +380,7 @@ impl char {
380380
#[stable(feature = "rust1", since = "1.0.0")]
381381
#[inline]
382382
pub fn escape_unicode(self) -> EscapeUnicode {
383-
let c = self as u32;
384-
385-
// or-ing 1 ensures that for c==0 the code computes that one
386-
// digit should be printed and (which is the same) avoids the
387-
// (31 - 32) underflow
388-
let msb = 31 - (c | 1).leading_zeros();
389-
390-
// the index of the most significant hex digit
391-
let ms_hex_digit = msb / 4;
392-
EscapeUnicode {
393-
c: self,
394-
state: EscapeUnicodeState::Backslash,
395-
hex_digit_idx: ms_hex_digit as usize,
396-
}
383+
EscapeUnicode::new(self)
397384
}
398385

399386
/// An extended version of `escape_debug` that optionally permits escaping
@@ -403,21 +390,20 @@ impl char {
403390
/// characters, and double quotes in strings.
404391
#[inline]
405392
pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> EscapeDebug {
406-
let init_state = match self {
407-
'\0' => EscapeDefaultState::Backslash('0'),
408-
'\t' => EscapeDefaultState::Backslash('t'),
409-
'\r' => EscapeDefaultState::Backslash('r'),
410-
'\n' => EscapeDefaultState::Backslash('n'),
411-
'\\' => EscapeDefaultState::Backslash(self),
412-
'"' if args.escape_double_quote => EscapeDefaultState::Backslash(self),
413-
'\'' if args.escape_single_quote => EscapeDefaultState::Backslash(self),
393+
match self {
394+
'\0' => EscapeDebug::backslash(b'0'),
395+
'\t' => EscapeDebug::backslash(b't'),
396+
'\r' => EscapeDebug::backslash(b'r'),
397+
'\n' => EscapeDebug::backslash(b'n'),
398+
'\\' => EscapeDebug::backslash(b'\\'),
399+
'"' if args.escape_double_quote => EscapeDebug::backslash(b'"'),
400+
'\'' if args.escape_single_quote => EscapeDebug::backslash(b'\''),
414401
_ if args.escape_grapheme_extended && self.is_grapheme_extended() => {
415-
EscapeDefaultState::Unicode(self.escape_unicode())
402+
EscapeDebug::from_unicode(self.escape_unicode())
416403
}
417-
_ if is_printable(self) => EscapeDefaultState::Char(self),
418-
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
419-
};
420-
EscapeDebug(EscapeDefault { state: init_state })
404+
_ if is_printable(self) => EscapeDebug::printable(self),
405+
_ => EscapeDebug::from_unicode(self.escape_unicode()),
406+
}
421407
}
422408

423409
/// Returns an iterator that yields the literal escape code of a character
@@ -515,15 +501,14 @@ impl char {
515501
#[stable(feature = "rust1", since = "1.0.0")]
516502
#[inline]
517503
pub fn escape_default(self) -> EscapeDefault {
518-
let init_state = match self {
519-
'\t' => EscapeDefaultState::Backslash('t'),
520-
'\r' => EscapeDefaultState::Backslash('r'),
521-
'\n' => EscapeDefaultState::Backslash('n'),
522-
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
523-
'\x20'..='\x7e' => EscapeDefaultState::Char(self),
524-
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
525-
};
526-
EscapeDefault { state: init_state }
504+
match self {
505+
'\t' => EscapeDefault::backslash(b't'),
506+
'\r' => EscapeDefault::backslash(b'r'),
507+
'\n' => EscapeDefault::backslash(b'n'),
508+
'\\' | '\'' | '"' => EscapeDefault::backslash(self as u8),
509+
'\x20'..='\x7e' => EscapeDefault::printable(self as u8),
510+
_ => EscapeDefault::from_unicode(self.escape_unicode()),
511+
}
527512
}
528513

529514
/// Returns the number of bytes this `char` would need if encoded in UTF-8.

0 commit comments

Comments
 (0)