Skip to content

Commit d1556b9

Browse files
committed
syntax: copy unstable char::escape_{default,unicode} code into libsyntax
This avoids using unsafe behavior.
1 parent 6b29a7d commit d1556b9

File tree

4 files changed

+123
-8
lines changed

4 files changed

+123
-8
lines changed

src/libsyntax/parse/lexer/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use diagnostic::SpanHandler;
1515
use ext::tt::transcribe::tt_next_token;
1616
use parse::token::str_to_ident;
1717
use parse::token;
18-
use str::char_at;
18+
use str::{char_at, escape_default};
1919

2020
use std::borrow::Cow;
2121
use std::char;
@@ -205,7 +205,7 @@ impl<'a> StringReader<'a> {
205205
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> ! {
206206
let mut m = m.to_string();
207207
m.push_str(": ");
208-
for c in c.escape_default() { m.push(c) }
208+
for c in escape_default(c) { m.push(c) }
209209
self.fatal_span_(from_pos, to_pos, &m[..]);
210210
}
211211

@@ -214,7 +214,7 @@ impl<'a> StringReader<'a> {
214214
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
215215
let mut m = m.to_string();
216216
m.push_str(": ");
217-
for c in c.escape_default() { m.push(c) }
217+
for c in escape_default(c) { m.push(c) }
218218
self.err_span_(from_pos, to_pos, &m[..]);
219219
}
220220

src/libsyntax/parse/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use diagnostic::{SpanHandler, mk_span_handler, default_handler, Auto, FatalError
1616
use parse::attr::ParserAttr;
1717
use parse::parser::Parser;
1818
use ptr::P;
19-
use str::char_at;
19+
use str::{char_at, escape_default_string};
2020

2121
use std::cell::{Cell, RefCell};
2222
use std::fs::File;
@@ -430,7 +430,7 @@ pub fn char_lit(lit: &str) -> (char, isize) {
430430
/// Parse a string representing a string literal into its final form. Does
431431
/// unescaping.
432432
pub fn str_lit(lit: &str) -> String {
433-
debug!("parse_str_lit: given {}", lit.escape_default());
433+
debug!("parse_str_lit: given {}", escape_default_string(lit));
434434
let mut res = String::with_capacity(lit.len());
435435

436436
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator
@@ -505,7 +505,7 @@ pub fn str_lit(lit: &str) -> String {
505505
/// Parse a string representing a raw string literal into its final form. The
506506
/// only operation this does is convert embedded CRLF into a single LF.
507507
pub fn raw_str_lit(lit: &str) -> String {
508-
debug!("raw_str_lit: given {}", lit.escape_default());
508+
debug!("raw_str_lit: given {}", escape_default_string(lit));
509509
let mut res = String::with_capacity(lit.len());
510510

511511
// FIXME #8372: This could be a for-loop if it didn't borrow the iterator

src/libsyntax/print/pprust.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use print::pp::{Breaks, eof};
2727
use print::pp::Breaks::{Consistent, Inconsistent};
2828
use ptr::P;
2929
use std_inject;
30+
use str::{escape_default, escape_default_string};
3031

3132
use std::ascii;
3233
use std::io::{self, Write, Read};
@@ -2801,7 +2802,7 @@ impl<'a> State<'a> {
28012802
}
28022803
ast::LitChar(ch) => {
28032804
let mut res = String::from("'");
2804-
res.extend(ch.escape_default());
2805+
res.extend(escape_default(ch));
28052806
res.push('\'');
28062807
word(&mut self.s, &res[..])
28072808
}
@@ -2934,7 +2935,7 @@ impl<'a> State<'a> {
29342935
style: ast::StrStyle) -> io::Result<()> {
29352936
let st = match style {
29362937
ast::CookedStr => {
2937-
(format!("\"{}\"", st.escape_default()))
2938+
(format!("\"{}\"", escape_default_string(st)))
29382939
}
29392940
ast::RawStr(n) => {
29402941
(format!("r{delim}\"{string}\"{delim}",

src/libsyntax/str.rs

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,120 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11+
use std::mem::transmute;
12+
1113
pub fn char_at(s: &str, byte: usize) -> char {
1214
s[byte..].chars().next().unwrap()
1315
}
16+
17+
// FIXME: This was copied from core/char.rs because it is currenty unstable.
18+
pub fn escape_unicode(ch: char) -> EscapeUnicode {
19+
EscapeUnicode { c: ch, state: EscapeUnicodeState::Backslash }
20+
}
21+
22+
// FIXME: This was copied from core/char.rs because it is currenty unstable.
23+
pub fn escape_default(ch: char) -> EscapeDefault {
24+
let init_state = match ch {
25+
'\t' => EscapeDefaultState::Backslash('t'),
26+
'\r' => EscapeDefaultState::Backslash('r'),
27+
'\n' => EscapeDefaultState::Backslash('n'),
28+
'\\' => EscapeDefaultState::Backslash('\\'),
29+
'\'' => EscapeDefaultState::Backslash('\''),
30+
'"' => EscapeDefaultState::Backslash('"'),
31+
'\x20' ... '\x7e' => EscapeDefaultState::Char(ch),
32+
_ => EscapeDefaultState::Unicode(escape_unicode(ch))
33+
};
34+
EscapeDefault { state: init_state }
35+
}
36+
37+
pub fn escape_default_string(s: &str) -> String {
38+
s.chars().flat_map(escape_default).collect()
39+
}
40+
41+
// FIXME: This was copied from core/char.rs because it is currenty unstable.
42+
pub struct EscapeUnicode {
43+
c: char,
44+
state: EscapeUnicodeState
45+
}
46+
47+
enum EscapeUnicodeState {
48+
Backslash,
49+
Type,
50+
LeftBrace,
51+
Value(usize),
52+
RightBrace,
53+
Done,
54+
}
55+
56+
impl Iterator for EscapeUnicode {
57+
type Item = char;
58+
59+
fn next(&mut self) -> Option<char> {
60+
match self.state {
61+
EscapeUnicodeState::Backslash => {
62+
self.state = EscapeUnicodeState::Type;
63+
Some('\\')
64+
}
65+
EscapeUnicodeState::Type => {
66+
self.state = EscapeUnicodeState::LeftBrace;
67+
Some('u')
68+
}
69+
EscapeUnicodeState::LeftBrace => {
70+
let mut n = 0;
71+
while (self.c as u32) >> (4 * (n + 1)) != 0 {
72+
n += 1;
73+
}
74+
self.state = EscapeUnicodeState::Value(n);
75+
Some('{')
76+
}
77+
EscapeUnicodeState::Value(offset) => {
78+
let v = match ((self.c as i32) >> (offset * 4)) & 0xf {
79+
i @ 0 ... 9 => '0' as i32 + i,
80+
i => 'a' as i32 + (i - 10)
81+
};
82+
if offset == 0 {
83+
self.state = EscapeUnicodeState::RightBrace;
84+
} else {
85+
self.state = EscapeUnicodeState::Value(offset - 1);
86+
}
87+
Some(unsafe { transmute(v) })
88+
}
89+
EscapeUnicodeState::RightBrace => {
90+
self.state = EscapeUnicodeState::Done;
91+
Some('}')
92+
}
93+
EscapeUnicodeState::Done => None,
94+
}
95+
}
96+
}
97+
98+
// FIXME: This was copied from core/char.rs because it is currenty unstable.
99+
pub struct EscapeDefault {
100+
state: EscapeDefaultState
101+
}
102+
103+
enum EscapeDefaultState {
104+
Backslash(char),
105+
Char(char),
106+
Done,
107+
Unicode(EscapeUnicode),
108+
}
109+
110+
impl Iterator for EscapeDefault {
111+
type Item = char;
112+
113+
fn next(&mut self) -> Option<char> {
114+
match self.state {
115+
EscapeDefaultState::Backslash(c) => {
116+
self.state = EscapeDefaultState::Char(c);
117+
Some('\\')
118+
}
119+
EscapeDefaultState::Char(c) => {
120+
self.state = EscapeDefaultState::Done;
121+
Some(c)
122+
}
123+
EscapeDefaultState::Done => None,
124+
EscapeDefaultState::Unicode(ref mut iter) => iter.next()
125+
}
126+
}
127+
}

0 commit comments

Comments
 (0)