Skip to content

Commit df5404c

Browse files
committed
std: Change escape_unicode to use new escapes
This changes the `escape_unicode` method on a `char` to use the new style of unicode escapes in the language. Closes #19811 Closes #19879
1 parent 59287b0 commit df5404c

File tree

4 files changed

+111
-111
lines changed

4 files changed

+111
-111
lines changed

src/libcollections/str.rs

+23-13
Original file line numberDiff line numberDiff line change
@@ -1597,17 +1597,24 @@ mod tests {
15971597

15981598
#[test]
15991599
fn test_escape_unicode() {
1600-
assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1601-
assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1602-
assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1603-
assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1600+
assert_eq!("abc".escape_unicode(),
1601+
String::from_str("\\u{61}\\u{62}\\u{63}"));
1602+
assert_eq!("a c".escape_unicode(),
1603+
String::from_str("\\u{61}\\u{20}\\u{63}"));
1604+
assert_eq!("\r\n\t".escape_unicode(),
1605+
String::from_str("\\u{d}\\u{a}\\u{9}"));
1606+
assert_eq!("'\"\\".escape_unicode(),
1607+
String::from_str("\\u{27}\\u{22}\\u{5c}"));
16041608
assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
1605-
String::from_str("\\x00\\x01\\u00fe\\u00ff"));
1606-
assert_eq!("\u{100}\u{ffff}".escape_unicode(), String::from_str("\\u0100\\uffff"));
1609+
String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
1610+
assert_eq!("\u{100}\u{ffff}".escape_unicode(),
1611+
String::from_str("\\u{100}\\u{ffff}"));
16071612
assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
1608-
String::from_str("\\U00010000\\U0010ffff"));
1609-
assert_eq!("ab\u{fb00}".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1610-
assert_eq!("\u{1d4ea}\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1613+
String::from_str("\\u{10000}\\u{10ffff}"));
1614+
assert_eq!("ab\u{fb00}".escape_unicode(),
1615+
String::from_str("\\u{61}\\u{62}\\u{fb00}"));
1616+
assert_eq!("\u{1d4ea}\r".escape_unicode(),
1617+
String::from_str("\\u{1d4ea}\\u{d}"));
16111618
}
16121619

16131620
#[test]
@@ -1616,11 +1623,14 @@ mod tests {
16161623
assert_eq!("a c".escape_default(), String::from_str("a c"));
16171624
assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
16181625
assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1619-
assert_eq!("\u{100}\u{ffff}".escape_default(), String::from_str("\\u0100\\uffff"));
1626+
assert_eq!("\u{100}\u{ffff}".escape_default(),
1627+
String::from_str("\\u{100}\\u{ffff}"));
16201628
assert_eq!("\u{10000}\u{10ffff}".escape_default(),
1621-
String::from_str("\\U00010000\\U0010ffff"));
1622-
assert_eq!("ab\u{fb00}".escape_default(), String::from_str("ab\\ufb00"));
1623-
assert_eq!("\u{1d4ea}\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1629+
String::from_str("\\u{10000}\\u{10ffff}"));
1630+
assert_eq!("ab\u{fb00}".escape_default(),
1631+
String::from_str("ab\\u{fb00}"));
1632+
assert_eq!("\u{1d4ea}\r".escape_default(),
1633+
String::from_str("\\u{1d4ea}\\r"));
16241634
}
16251635

16261636
#[test]

src/libcore/char.rs

+68-75
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
#![allow(non_snake_case)]
1616
#![doc(primitive = "char")]
1717

18+
use iter::Iterator;
1819
use mem::transmute;
1920
use ops::FnMut;
20-
use option::Option;
2121
use option::Option::{None, Some};
22-
use iter::{range_step, Iterator, RangeStep};
22+
use option::Option;
2323
use slice::SliceExt;
2424

2525
// UTF-8 ranges and tags for encoding characters
@@ -156,34 +156,15 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
156156
}
157157
}
158158

159-
///
160-
/// Returns the hexadecimal Unicode escape of a `char`
161-
///
162-
/// The rules are as follows:
163-
///
164-
/// - chars in [0,0xff] get 2-digit escapes: `\\xNN`
165-
/// - chars in [0x100,0xffff] get 4-digit escapes: `\\u{NNNN}`
166-
/// - chars above 0x10000 get 8-digit escapes: `\\u{{NNN}NNNNN}`
167-
///
159+
/// Deprecated, call the escape_unicode method instead.
168160
#[deprecated = "use the Char::escape_unicode method"]
169161
pub fn escape_unicode<F>(c: char, mut f: F) where F: FnMut(char) {
170162
for char in c.escape_unicode() {
171163
f(char);
172164
}
173165
}
174166

175-
///
176-
/// Returns a 'default' ASCII and C++11-like literal escape of a `char`
177-
///
178-
/// The default is chosen with a bias toward producing literals that are
179-
/// legal in a variety of languages, including C++11 and similar C-family
180-
/// languages. The exact rules are:
181-
///
182-
/// - Tab, CR and LF are escaped as '\t', '\r' and '\n' respectively.
183-
/// - Single-quote, double-quote and backslash chars are backslash-escaped.
184-
/// - Any other chars in the range [0x20,0x7e] are not escaped.
185-
/// - Any other chars are given hex Unicode escapes; see `escape_unicode`.
186-
///
167+
/// Deprecated, call the escape_default method instead.
187168
#[deprecated = "use the Char::escape_default method"]
188169
pub fn escape_default<F>(c: char, mut f: F) where F: FnMut(char) {
189170
for c in c.escape_default() {
@@ -267,13 +248,11 @@ pub trait Char {
267248
/// Returns an iterator that yields the hexadecimal Unicode escape
268249
/// of a character, as `char`s.
269250
///
270-
/// The rules are as follows:
271-
///
272-
/// * Characters in [0,0xff] get 2-digit escapes: `\\xNN`
273-
/// * Characters in [0x100,0xffff] get 4-digit escapes: `\\u{NNNN}`.
274-
/// * Characters above 0x10000 get 8-digit escapes: `\\u{{NNN}NNNNN}`.
251+
/// All characters are escaped with Rust syntax of the form `\\u{NNNN}`
252+
/// where `NNNN` is the shortest hexadecimal representation of the code
253+
/// point.
275254
#[unstable = "pending error conventions, trait organization"]
276-
fn escape_unicode(self) -> UnicodeEscapedChars;
255+
fn escape_unicode(self) -> EscapeUnicode;
277256

278257
/// Returns an iterator that yields the 'default' ASCII and
279258
/// C++11-like literal escape of a character, as `char`s.
@@ -288,7 +267,7 @@ pub trait Char {
288267
/// * Any other chars in the range [0x20,0x7e] are not escaped.
289268
/// * Any other chars are given hex Unicode escapes; see `escape_unicode`.
290269
#[unstable = "pending error conventions, trait organization"]
291-
fn escape_default(self) -> DefaultEscapedChars;
270+
fn escape_default(self) -> EscapeDefault;
292271

293272
/// Returns the amount of bytes this character would need if encoded in
294273
/// UTF-8.
@@ -358,23 +337,23 @@ impl Char for char {
358337
fn from_u32(i: u32) -> Option<char> { from_u32(i) }
359338

360339
#[unstable = "pending error conventions, trait organization"]
361-
fn escape_unicode(self) -> UnicodeEscapedChars {
362-
UnicodeEscapedChars { c: self, state: UnicodeEscapedCharsState::Backslash }
340+
fn escape_unicode(self) -> EscapeUnicode {
341+
EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash }
363342
}
364343

365344
#[unstable = "pending error conventions, trait organization"]
366-
fn escape_default(self) -> DefaultEscapedChars {
345+
fn escape_default(self) -> EscapeDefault {
367346
let init_state = match self {
368-
'\t' => DefaultEscapedCharsState::Backslash('t'),
369-
'\r' => DefaultEscapedCharsState::Backslash('r'),
370-
'\n' => DefaultEscapedCharsState::Backslash('n'),
371-
'\\' => DefaultEscapedCharsState::Backslash('\\'),
372-
'\'' => DefaultEscapedCharsState::Backslash('\''),
373-
'"' => DefaultEscapedCharsState::Backslash('"'),
374-
'\x20' ... '\x7e' => DefaultEscapedCharsState::Char(self),
375-
_ => DefaultEscapedCharsState::Unicode(self.escape_unicode())
347+
'\t' => EscapeDefaultState::Backslash('t'),
348+
'\r' => EscapeDefaultState::Backslash('r'),
349+
'\n' => EscapeDefaultState::Backslash('n'),
350+
'\\' => EscapeDefaultState::Backslash('\\'),
351+
'\'' => EscapeDefaultState::Backslash('\''),
352+
'"' => EscapeDefaultState::Backslash('"'),
353+
'\x20' ... '\x7e' => EscapeDefaultState::Char(self),
354+
_ => EscapeDefaultState::Unicode(self.escape_unicode())
376355
};
377-
DefaultEscapedChars { state: init_state }
356+
EscapeDefault { state: init_state }
378357
}
379358

380359
#[inline]
@@ -451,72 +430,86 @@ impl Char for char {
451430

452431
/// An iterator over the characters that represent a `char`, as escaped by
453432
/// Rust's unicode escaping rules.
454-
pub struct UnicodeEscapedChars {
433+
pub struct EscapeUnicode {
455434
c: char,
456-
state: UnicodeEscapedCharsState
435+
state: EscapeUnicodeState
457436
}
458437

459-
enum UnicodeEscapedCharsState {
438+
enum EscapeUnicodeState {
460439
Backslash,
461440
Type,
462-
Value(RangeStep<i32>),
441+
LeftBrace,
442+
Value(uint),
443+
RightBrace,
444+
Done,
463445
}
464446

465-
impl Iterator<char> for UnicodeEscapedChars {
447+
impl Iterator<char> for EscapeUnicode {
466448
fn next(&mut self) -> Option<char> {
467449
match self.state {
468-
UnicodeEscapedCharsState::Backslash => {
469-
self.state = UnicodeEscapedCharsState::Type;
450+
EscapeUnicodeState::Backslash => {
451+
self.state = EscapeUnicodeState::Type;
470452
Some('\\')
471453
}
472-
UnicodeEscapedCharsState::Type => {
473-
let (typechar, pad) = if self.c <= '\x7f' { ('x', 2) }
474-
else if self.c <= '\u{ffff}' { ('u', 4) }
475-
else { ('U', 8) };
476-
self.state = UnicodeEscapedCharsState::Value(range_step(4 * (pad - 1), -1, -4i32));
477-
Some(typechar)
454+
EscapeUnicodeState::Type => {
455+
self.state = EscapeUnicodeState::LeftBrace;
456+
Some('u')
478457
}
479-
UnicodeEscapedCharsState::Value(ref mut range_step) => match range_step.next() {
480-
Some(offset) => {
481-
let offset = offset as uint;
482-
let v = match ((self.c as i32) >> offset) & 0xf {
483-
i @ 0 ... 9 => '0' as i32 + i,
484-
i => 'a' as i32 + (i - 10)
485-
};
486-
Some(unsafe { transmute(v) })
458+
EscapeUnicodeState::LeftBrace => {
459+
let mut n = 0u;
460+
while (self.c as u32) >> (4 * (n + 1)) != 0 {
461+
n += 1;
487462
}
488-
None => None
463+
self.state = EscapeUnicodeState::Value(n);
464+
Some('{')
465+
}
466+
EscapeUnicodeState::Value(offset) => {
467+
let v = match ((self.c as i32) >> (offset * 4)) & 0xf {
468+
i @ 0 ... 9 => '0' as i32 + i,
469+
i => 'a' as i32 + (i - 10)
470+
};
471+
if offset == 0 {
472+
self.state = EscapeUnicodeState::RightBrace;
473+
} else {
474+
self.state = EscapeUnicodeState::Value(offset - 1);
475+
}
476+
Some(unsafe { transmute(v) })
477+
}
478+
EscapeUnicodeState::RightBrace => {
479+
self.state = EscapeUnicodeState::Done;
480+
Some('}')
489481
}
482+
EscapeUnicodeState::Done => None,
490483
}
491484
}
492485
}
493486

494487
/// An iterator over the characters that represent a `char`, escaped
495488
/// for maximum portability.
496-
pub struct DefaultEscapedChars {
497-
state: DefaultEscapedCharsState
489+
pub struct EscapeDefault {
490+
state: EscapeDefaultState
498491
}
499492

500-
enum DefaultEscapedCharsState {
493+
enum EscapeDefaultState {
501494
Backslash(char),
502495
Char(char),
503496
Done,
504-
Unicode(UnicodeEscapedChars),
497+
Unicode(EscapeUnicode),
505498
}
506499

507-
impl Iterator<char> for DefaultEscapedChars {
500+
impl Iterator<char> for EscapeDefault {
508501
fn next(&mut self) -> Option<char> {
509502
match self.state {
510-
DefaultEscapedCharsState::Backslash(c) => {
511-
self.state = DefaultEscapedCharsState::Char(c);
503+
EscapeDefaultState::Backslash(c) => {
504+
self.state = EscapeDefaultState::Char(c);
512505
Some('\\')
513506
}
514-
DefaultEscapedCharsState::Char(c) => {
515-
self.state = DefaultEscapedCharsState::Done;
507+
EscapeDefaultState::Char(c) => {
508+
self.state = EscapeDefaultState::Done;
516509
Some(c)
517510
}
518-
DefaultEscapedCharsState::Done => None,
519-
DefaultEscapedCharsState::Unicode(ref mut iter) => iter.next()
511+
EscapeDefaultState::Done => None,
512+
EscapeDefaultState::Unicode(ref mut iter) => iter.next()
520513
}
521514
}
522515
}

src/libcoretest/char.rs

+19-22
Original file line numberDiff line numberDiff line change
@@ -135,38 +135,35 @@ fn test_escape_default() {
135135
let s = string('~');
136136
assert_eq!(s, "~");
137137
let s = string('\x00');
138-
assert_eq!(s, "\\x00");
138+
assert_eq!(s, "\\u{0}");
139139
let s = string('\x1f');
140-
assert_eq!(s, "\\x1f");
140+
assert_eq!(s, "\\u{1f}");
141141
let s = string('\x7f');
142-
assert_eq!(s, "\\x7f");
143-
let s = string('\u00ff');
144-
assert_eq!(s, "\\u00ff");
145-
let s = string('\u011b');
146-
assert_eq!(s, "\\u011b");
147-
let s = string('\U0001d4b6');
148-
assert_eq!(s, "\\U0001d4b6");
142+
assert_eq!(s, "\\u{7f}");
143+
let s = string('\u{ff}');
144+
assert_eq!(s, "\\u{ff}");
145+
let s = string('\u{11b}');
146+
assert_eq!(s, "\\u{11b}");
147+
let s = string('\u{1d4b6}');
148+
assert_eq!(s, "\\u{1d4b6}");
149149
}
150150

151151
#[test]
152152
fn test_escape_unicode() {
153-
fn string(c: char) -> String {
154-
let mut result = String::new();
155-
escape_unicode(c, |c| { result.push(c); });
156-
return result;
157-
}
153+
fn string(c: char) -> String { c.escape_unicode().collect() }
154+
158155
let s = string('\x00');
159-
assert_eq!(s, "\\x00");
156+
assert_eq!(s, "\\u{0}");
160157
let s = string('\n');
161-
assert_eq!(s, "\\x0a");
158+
assert_eq!(s, "\\u{a}");
162159
let s = string(' ');
163-
assert_eq!(s, "\\x20");
160+
assert_eq!(s, "\\u{20}");
164161
let s = string('a');
165-
assert_eq!(s, "\\x61");
166-
let s = string('\u011b');
167-
assert_eq!(s, "\\u011b");
168-
let s = string('\U0001d4b6');
169-
assert_eq!(s, "\\U0001d4b6");
162+
assert_eq!(s, "\\u{61}");
163+
let s = string('\u{11b}');
164+
assert_eq!(s, "\\u{11b}");
165+
let s = string('\u{1d4b6}');
166+
assert_eq!(s, "\\u{1d4b6}");
170167
}
171168

172169
#[test]

src/test/compile-fail-fulldeps/macro-crate-cannot-read-embedded-ident.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
// aux-build:macro_crate_test.rs
1212
// ignore-stage1
1313
// ignore-android
14-
// error-pattern: unknown start of token: \x00
14+
// error-pattern: unknown start of token: \u{0}
1515

1616
// Issue #15750 and #15962 : this test is checking that the standard
1717
// parser rejects embedded idents. pnkfelix did not want to attempt

0 commit comments

Comments
 (0)