Skip to content

Commit c51f002

Browse files
committed
Only escape extended grapheme characters in the first position
1 parent 8c89e7f commit c51f002

File tree

4 files changed

+28
-15
lines changed

4 files changed

+28
-15
lines changed

src/liballoc/str.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -372,12 +372,15 @@ impl str {
372372

373373
/// Escapes each char in `s` with [`char::escape_debug`].
374374
///
375+
/// Note: only extended grapheme codepoints that begin the string will be
376+
/// escaped.
377+
///
375378
/// [`char::escape_debug`]: primitive.char.html#method.escape_debug
376379
#[unstable(feature = "str_escape",
377380
reason = "return type may change to be an iterator",
378381
issue = "27791")]
379382
pub fn escape_debug(&self) -> String {
380-
self.chars().flat_map(|c| c.escape_debug()).collect()
383+
self.chars().enumerate().flat_map(|(i, c)| c.escape_debug_ext(i == 0)).collect()
381384
}
382385

383386
/// Escapes each char in `s` with [`char::escape_default`].

src/liballoc/tests/str.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -999,7 +999,7 @@ fn test_escape_debug() {
999999
assert_eq!("\u{10000}\u{10ffff}".escape_debug(), "\u{10000}\\u{10ffff}");
10001000
assert_eq!("ab\u{200b}".escape_debug(), "ab\\u{200b}");
10011001
assert_eq!("\u{10d4ea}\r".escape_debug(), "\\u{10d4ea}\\r");
1002-
assert_eq!("\u{301}a\u{301}\u{e000}".escape_debug(), "\\u{301}a\\u{301}bé\\u{e000}");
1002+
assert_eq!("\u{301}a\u{301}\u{e000}".escape_debug(), "\\u{301}a\u{301}\\u{e000}");
10031003
}
10041004

10051005
#[test]

src/libcore/char/methods.rs

+22-12
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,27 @@ impl char {
187187
}
188188
}
189189

190+
/// An extended version of `escape_debug` that optionally permits escaping
191+
/// Extended Grapheme codepoints. This allows us to format characters like
192+
/// nonspacing marks better when they're at the start of a string.
193+
#[doc(hidden)]
194+
#[unstable(feature = "str_internals", issue = "0")]
195+
#[inline]
196+
pub fn escape_debug_ext(self, escape_grapheme_extended: bool) -> EscapeDebug {
197+
let init_state = match self {
198+
'\t' => EscapeDefaultState::Backslash('t'),
199+
'\r' => EscapeDefaultState::Backslash('r'),
200+
'\n' => EscapeDefaultState::Backslash('n'),
201+
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
202+
_ if escape_grapheme_extended && self.is_grapheme_extended() => {
203+
EscapeDefaultState::Unicode(self.escape_unicode())
204+
}
205+
_ if is_printable(self) => EscapeDefaultState::Char(self),
206+
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
207+
};
208+
EscapeDebug(EscapeDefault { state: init_state })
209+
}
210+
190211
/// Returns an iterator that yields the literal escape code of a character
191212
/// as `char`s.
192213
///
@@ -224,18 +245,7 @@ impl char {
224245
#[stable(feature = "char_escape_debug", since = "1.20.0")]
225246
#[inline]
226247
pub fn escape_debug(self) -> EscapeDebug {
227-
let init_state = match self {
228-
'\t' => EscapeDefaultState::Backslash('t'),
229-
'\r' => EscapeDefaultState::Backslash('r'),
230-
'\n' => EscapeDefaultState::Backslash('n'),
231-
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
232-
_ if self.is_grapheme_extended() => {
233-
EscapeDefaultState::Unicode(self.escape_unicode())
234-
}
235-
_ if is_printable(self) => EscapeDefaultState::Char(self),
236-
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
237-
};
238-
EscapeDebug(EscapeDefault { state: init_state })
248+
self.escape_debug_ext(true)
239249
}
240250

241251
/// Returns an iterator that yields the literal escape code of a character

src/libcore/tests/char.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ fn test_escape_debug() {
181181
assert_eq!(string('\u{ff}'), "\u{ff}");
182182
assert_eq!(string('\u{11b}'), "\u{11b}");
183183
assert_eq!(string('\u{1d4b6}'), "\u{1d4b6}");
184-
assert_eq!(string('\u{301}'), "'\\u{301}'"); // combining character
184+
assert_eq!(string('\u{301}'), "\\u{301}"); // combining character
185185
assert_eq!(string('\u{200b}'),"\\u{200b}"); // zero width space
186186
assert_eq!(string('\u{e000}'), "\\u{e000}"); // private use 1
187187
assert_eq!(string('\u{100000}'), "\\u{100000}"); // private use 2

0 commit comments

Comments
 (0)