Skip to content

Commit 34c5253

Browse files
committed
Move the rest of core::unicode::char to core::unicode
1 parent 1800d69 commit 34c5253

File tree

3 files changed

+266
-274
lines changed

3 files changed

+266
-274
lines changed

src/libcore/unicode/char.rs renamed to src/libcore/char/methods.rs

+128-123
Original file line numberDiff line numberDiff line change
@@ -8,150 +8,155 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
//! A character type.
12-
//!
13-
//! The `char` type represents a single character. More specifically, since
14-
//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
15-
//! scalar value]', which is similar to, but not the same as, a '[Unicode code
16-
//! point]'.
17-
//!
18-
//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
19-
//! [Unicode code point]: http://www.unicode.org/glossary/#code_point
20-
//!
21-
//! This module exists for technical reasons, the primary documentation for
22-
//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html)
23-
//! itself.
24-
//!
25-
//! This module is the home of the iterator implementations for the iterators
26-
//! implemented on `char`, as well as some useful constants and conversion
27-
//! functions that convert various types to `char`.
11+
//! impl char {}
2812
29-
#![stable(feature = "rust1", since = "1.0.0")]
30-
31-
use char::*;
32-
use char::CharExt as C;
33-
use iter::FusedIterator;
34-
use fmt::{self, Write};
13+
use slice;
14+
use str::from_utf8_unchecked_mut;
15+
use super::*;
16+
use super::CharExt as C;
17+
use super::printable::is_printable;
3518
use unicode::tables::{conversions, derived_property, general_category, property};
3619

37-
/// Returns an iterator that yields the lowercase equivalent of a `char`.
38-
///
39-
/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See
40-
/// its documentation for more.
41-
///
42-
/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase
43-
/// [`char`]: ../../std/primitive.char.html
44-
#[stable(feature = "rust1", since = "1.0.0")]
45-
#[derive(Debug, Clone)]
46-
pub struct ToLowercase(CaseMappingIter);
20+
#[stable(feature = "core", since = "1.6.0")]
21+
impl CharExt for char {
22+
#[inline]
23+
fn is_digit(self, radix: u32) -> bool {
24+
self.to_digit(radix).is_some()
25+
}
4726

48-
#[stable(feature = "rust1", since = "1.0.0")]
49-
impl Iterator for ToLowercase {
50-
type Item = char;
51-
fn next(&mut self) -> Option<char> {
52-
self.0.next()
27+
#[inline]
28+
fn to_digit(self, radix: u32) -> Option<u32> {
29+
if radix > 36 {
30+
panic!("to_digit: radix is too high (maximum 36)");
31+
}
32+
let val = match self {
33+
'0' ... '9' => self as u32 - '0' as u32,
34+
'a' ... 'z' => self as u32 - 'a' as u32 + 10,
35+
'A' ... 'Z' => self as u32 - 'A' as u32 + 10,
36+
_ => return None,
37+
};
38+
if val < radix { Some(val) }
39+
else { None }
5340
}
54-
}
5541

56-
#[stable(feature = "fused", since = "1.26.0")]
57-
impl FusedIterator for ToLowercase {}
42+
#[inline]
43+
fn escape_unicode(self) -> EscapeUnicode {
44+
let c = self as u32;
5845

59-
/// Returns an iterator that yields the uppercase equivalent of a `char`.
60-
///
61-
/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
62-
/// its documentation for more.
63-
///
64-
/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase
65-
/// [`char`]: ../../std/primitive.char.html
66-
#[stable(feature = "rust1", since = "1.0.0")]
67-
#[derive(Debug, Clone)]
68-
pub struct ToUppercase(CaseMappingIter);
46+
// or-ing 1 ensures that for c==0 the code computes that one
47+
// digit should be printed and (which is the same) avoids the
48+
// (31 - 32) underflow
49+
let msb = 31 - (c | 1).leading_zeros();
6950

70-
#[stable(feature = "rust1", since = "1.0.0")]
71-
impl Iterator for ToUppercase {
72-
type Item = char;
73-
fn next(&mut self) -> Option<char> {
74-
self.0.next()
51+
// the index of the most significant hex digit
52+
let ms_hex_digit = msb / 4;
53+
EscapeUnicode {
54+
c: self,
55+
state: EscapeUnicodeState::Backslash,
56+
hex_digit_idx: ms_hex_digit as usize,
57+
}
7558
}
76-
}
7759

78-
#[stable(feature = "fused", since = "1.26.0")]
79-
impl FusedIterator for ToUppercase {}
60+
#[inline]
61+
fn escape_default(self) -> EscapeDefault {
62+
let init_state = match self {
63+
'\t' => EscapeDefaultState::Backslash('t'),
64+
'\r' => EscapeDefaultState::Backslash('r'),
65+
'\n' => EscapeDefaultState::Backslash('n'),
66+
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
67+
'\x20' ... '\x7e' => EscapeDefaultState::Char(self),
68+
_ => EscapeDefaultState::Unicode(self.escape_unicode())
69+
};
70+
EscapeDefault { state: init_state }
71+
}
8072

81-
#[derive(Debug, Clone)]
82-
enum CaseMappingIter {
83-
Three(char, char, char),
84-
Two(char, char),
85-
One(char),
86-
Zero,
87-
}
73+
#[inline]
74+
fn escape_debug(self) -> EscapeDebug {
75+
let init_state = match self {
76+
'\t' => EscapeDefaultState::Backslash('t'),
77+
'\r' => EscapeDefaultState::Backslash('r'),
78+
'\n' => EscapeDefaultState::Backslash('n'),
79+
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
80+
c if is_printable(c) => EscapeDefaultState::Char(c),
81+
c => EscapeDefaultState::Unicode(c.escape_unicode()),
82+
};
83+
EscapeDebug(EscapeDefault { state: init_state })
84+
}
8885

89-
impl CaseMappingIter {
90-
fn new(chars: [char; 3]) -> CaseMappingIter {
91-
if chars[2] == '\0' {
92-
if chars[1] == '\0' {
93-
CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
94-
} else {
95-
CaseMappingIter::Two(chars[0], chars[1])
96-
}
86+
#[inline]
87+
fn len_utf8(self) -> usize {
88+
let code = self as u32;
89+
if code < MAX_ONE_B {
90+
1
91+
} else if code < MAX_TWO_B {
92+
2
93+
} else if code < MAX_THREE_B {
94+
3
9795
} else {
98-
CaseMappingIter::Three(chars[0], chars[1], chars[2])
96+
4
9997
}
10098
}
101-
}
10299

103-
impl Iterator for CaseMappingIter {
104-
type Item = char;
105-
fn next(&mut self) -> Option<char> {
106-
match *self {
107-
CaseMappingIter::Three(a, b, c) => {
108-
*self = CaseMappingIter::Two(b, c);
109-
Some(a)
110-
}
111-
CaseMappingIter::Two(b, c) => {
112-
*self = CaseMappingIter::One(c);
113-
Some(b)
114-
}
115-
CaseMappingIter::One(c) => {
116-
*self = CaseMappingIter::Zero;
117-
Some(c)
118-
}
119-
CaseMappingIter::Zero => None,
120-
}
100+
#[inline]
101+
fn len_utf16(self) -> usize {
102+
let ch = self as u32;
103+
if (ch & 0xFFFF) == ch { 1 } else { 2 }
121104
}
122-
}
123105

124-
impl fmt::Display for CaseMappingIter {
125-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126-
match *self {
127-
CaseMappingIter::Three(a, b, c) => {
128-
f.write_char(a)?;
129-
f.write_char(b)?;
130-
f.write_char(c)
131-
}
132-
CaseMappingIter::Two(b, c) => {
133-
f.write_char(b)?;
134-
f.write_char(c)
135-
}
136-
CaseMappingIter::One(c) => {
137-
f.write_char(c)
138-
}
139-
CaseMappingIter::Zero => Ok(()),
106+
#[inline]
107+
fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
108+
let code = self as u32;
109+
unsafe {
110+
let len =
111+
if code < MAX_ONE_B && !dst.is_empty() {
112+
*dst.get_unchecked_mut(0) = code as u8;
113+
1
114+
} else if code < MAX_TWO_B && dst.len() >= 2 {
115+
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
116+
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
117+
2
118+
} else if code < MAX_THREE_B && dst.len() >= 3 {
119+
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
120+
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
121+
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
122+
3
123+
} else if dst.len() >= 4 {
124+
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
125+
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
126+
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
127+
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
128+
4
129+
} else {
130+
panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
131+
from_u32_unchecked(code).len_utf8(),
132+
code,
133+
dst.len())
134+
};
135+
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
140136
}
141137
}
142-
}
143-
144-
#[stable(feature = "char_struct_display", since = "1.16.0")]
145-
impl fmt::Display for ToLowercase {
146-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
147-
fmt::Display::fmt(&self.0, f)
148-
}
149-
}
150138

151-
#[stable(feature = "char_struct_display", since = "1.16.0")]
152-
impl fmt::Display for ToUppercase {
153-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
154-
fmt::Display::fmt(&self.0, f)
139+
#[inline]
140+
fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
141+
let mut code = self as u32;
142+
unsafe {
143+
if (code & 0xFFFF) == code && !dst.is_empty() {
144+
// The BMP falls through (assuming non-surrogate, as it should)
145+
*dst.get_unchecked_mut(0) = code as u16;
146+
slice::from_raw_parts_mut(dst.as_mut_ptr(), 1)
147+
} else if dst.len() >= 2 {
148+
// Supplementary planes break into surrogates.
149+
code -= 0x1_0000;
150+
*dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16);
151+
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
152+
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
153+
} else {
154+
panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
155+
from_u32_unchecked(code).len_utf16(),
156+
code,
157+
dst.len())
158+
}
159+
}
155160
}
156161
}
157162

0 commit comments

Comments
 (0)