|
8 | 8 | // option. This file may not be copied, modified, or distributed
|
9 | 9 | // except according to those terms.
|
10 | 10 |
|
11 |
| -//! A character type. |
12 |
| -//! |
13 |
| -//! The `char` type represents a single character. More specifically, since |
14 |
| -//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode |
15 |
| -//! scalar value]', which is similar to, but not the same as, a '[Unicode code |
16 |
| -//! point]'. |
17 |
| -//! |
18 |
| -//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value |
19 |
| -//! [Unicode code point]: http://www.unicode.org/glossary/#code_point |
20 |
| -//! |
21 |
| -//! This module exists for technical reasons, the primary documentation for |
22 |
| -//! `char` is directly on [the `char` primitive type](../../std/primitive.char.html) |
23 |
| -//! itself. |
24 |
| -//! |
25 |
| -//! This module is the home of the iterator implementations for the iterators |
26 |
| -//! implemented on `char`, as well as some useful constants and conversion |
27 |
| -//! functions that convert various types to `char`. |
| 11 | +//! impl char {} |
28 | 12 |
|
29 |
| -#![stable(feature = "rust1", since = "1.0.0")] |
30 |
| - |
31 |
| -use char::*; |
32 |
| -use char::CharExt as C; |
33 |
| -use iter::FusedIterator; |
34 |
| -use fmt::{self, Write}; |
| 13 | +use slice; |
| 14 | +use str::from_utf8_unchecked_mut; |
| 15 | +use super::*; |
| 16 | +use super::CharExt as C; |
| 17 | +use super::printable::is_printable; |
35 | 18 | use unicode::tables::{conversions, derived_property, general_category, property};
|
36 | 19 |
|
37 |
| -/// Returns an iterator that yields the lowercase equivalent of a `char`. |
38 |
| -/// |
39 |
| -/// This `struct` is created by the [`to_lowercase`] method on [`char`]. See |
40 |
| -/// its documentation for more. |
41 |
| -/// |
42 |
| -/// [`to_lowercase`]: ../../std/primitive.char.html#method.to_lowercase |
43 |
| -/// [`char`]: ../../std/primitive.char.html |
44 |
| -#[stable(feature = "rust1", since = "1.0.0")] |
45 |
| -#[derive(Debug, Clone)] |
46 |
| -pub struct ToLowercase(CaseMappingIter); |
| 20 | +#[stable(feature = "core", since = "1.6.0")] |
| 21 | +impl CharExt for char { |
| 22 | + #[inline] |
| 23 | + fn is_digit(self, radix: u32) -> bool { |
| 24 | + self.to_digit(radix).is_some() |
| 25 | + } |
47 | 26 |
|
48 |
| -#[stable(feature = "rust1", since = "1.0.0")] |
49 |
| -impl Iterator for ToLowercase { |
50 |
| - type Item = char; |
51 |
| - fn next(&mut self) -> Option<char> { |
52 |
| - self.0.next() |
| 27 | + #[inline] |
| 28 | + fn to_digit(self, radix: u32) -> Option<u32> { |
| 29 | + if radix > 36 { |
| 30 | + panic!("to_digit: radix is too high (maximum 36)"); |
| 31 | + } |
| 32 | + let val = match self { |
| 33 | + '0' ... '9' => self as u32 - '0' as u32, |
| 34 | + 'a' ... 'z' => self as u32 - 'a' as u32 + 10, |
| 35 | + 'A' ... 'Z' => self as u32 - 'A' as u32 + 10, |
| 36 | + _ => return None, |
| 37 | + }; |
| 38 | + if val < radix { Some(val) } |
| 39 | + else { None } |
53 | 40 | }
|
54 |
| -} |
55 | 41 |
|
56 |
| -#[stable(feature = "fused", since = "1.26.0")] |
57 |
| -impl FusedIterator for ToLowercase {} |
| 42 | + #[inline] |
| 43 | + fn escape_unicode(self) -> EscapeUnicode { |
| 44 | + let c = self as u32; |
58 | 45 |
|
59 |
| -/// Returns an iterator that yields the uppercase equivalent of a `char`. |
60 |
| -/// |
61 |
| -/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See |
62 |
| -/// its documentation for more. |
63 |
| -/// |
64 |
| -/// [`to_uppercase`]: ../../std/primitive.char.html#method.to_uppercase |
65 |
| -/// [`char`]: ../../std/primitive.char.html |
66 |
| -#[stable(feature = "rust1", since = "1.0.0")] |
67 |
| -#[derive(Debug, Clone)] |
68 |
| -pub struct ToUppercase(CaseMappingIter); |
| 46 | + // or-ing 1 ensures that for c==0 the code computes that one |
| 47 | + // digit should be printed and (which is the same) avoids the |
| 48 | + // (31 - 32) underflow |
| 49 | + let msb = 31 - (c | 1).leading_zeros(); |
69 | 50 |
|
70 |
| -#[stable(feature = "rust1", since = "1.0.0")] |
71 |
| -impl Iterator for ToUppercase { |
72 |
| - type Item = char; |
73 |
| - fn next(&mut self) -> Option<char> { |
74 |
| - self.0.next() |
| 51 | + // the index of the most significant hex digit |
| 52 | + let ms_hex_digit = msb / 4; |
| 53 | + EscapeUnicode { |
| 54 | + c: self, |
| 55 | + state: EscapeUnicodeState::Backslash, |
| 56 | + hex_digit_idx: ms_hex_digit as usize, |
| 57 | + } |
75 | 58 | }
|
76 |
| -} |
77 | 59 |
|
78 |
| -#[stable(feature = "fused", since = "1.26.0")] |
79 |
| -impl FusedIterator for ToUppercase {} |
| 60 | + #[inline] |
| 61 | + fn escape_default(self) -> EscapeDefault { |
| 62 | + let init_state = match self { |
| 63 | + '\t' => EscapeDefaultState::Backslash('t'), |
| 64 | + '\r' => EscapeDefaultState::Backslash('r'), |
| 65 | + '\n' => EscapeDefaultState::Backslash('n'), |
| 66 | + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), |
| 67 | + '\x20' ... '\x7e' => EscapeDefaultState::Char(self), |
| 68 | + _ => EscapeDefaultState::Unicode(self.escape_unicode()) |
| 69 | + }; |
| 70 | + EscapeDefault { state: init_state } |
| 71 | + } |
80 | 72 |
|
81 |
| -#[derive(Debug, Clone)] |
82 |
| -enum CaseMappingIter { |
83 |
| - Three(char, char, char), |
84 |
| - Two(char, char), |
85 |
| - One(char), |
86 |
| - Zero, |
87 |
| -} |
| 73 | + #[inline] |
| 74 | + fn escape_debug(self) -> EscapeDebug { |
| 75 | + let init_state = match self { |
| 76 | + '\t' => EscapeDefaultState::Backslash('t'), |
| 77 | + '\r' => EscapeDefaultState::Backslash('r'), |
| 78 | + '\n' => EscapeDefaultState::Backslash('n'), |
| 79 | + '\\' | '\'' | '"' => EscapeDefaultState::Backslash(self), |
| 80 | + c if is_printable(c) => EscapeDefaultState::Char(c), |
| 81 | + c => EscapeDefaultState::Unicode(c.escape_unicode()), |
| 82 | + }; |
| 83 | + EscapeDebug(EscapeDefault { state: init_state }) |
| 84 | + } |
88 | 85 |
|
89 |
| -impl CaseMappingIter { |
90 |
| - fn new(chars: [char; 3]) -> CaseMappingIter { |
91 |
| - if chars[2] == '\0' { |
92 |
| - if chars[1] == '\0' { |
93 |
| - CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0' |
94 |
| - } else { |
95 |
| - CaseMappingIter::Two(chars[0], chars[1]) |
96 |
| - } |
| 86 | + #[inline] |
| 87 | + fn len_utf8(self) -> usize { |
| 88 | + let code = self as u32; |
| 89 | + if code < MAX_ONE_B { |
| 90 | + 1 |
| 91 | + } else if code < MAX_TWO_B { |
| 92 | + 2 |
| 93 | + } else if code < MAX_THREE_B { |
| 94 | + 3 |
97 | 95 | } else {
|
98 |
| - CaseMappingIter::Three(chars[0], chars[1], chars[2]) |
| 96 | + 4 |
99 | 97 | }
|
100 | 98 | }
|
101 |
| -} |
102 | 99 |
|
103 |
| -impl Iterator for CaseMappingIter { |
104 |
| - type Item = char; |
105 |
| - fn next(&mut self) -> Option<char> { |
106 |
| - match *self { |
107 |
| - CaseMappingIter::Three(a, b, c) => { |
108 |
| - *self = CaseMappingIter::Two(b, c); |
109 |
| - Some(a) |
110 |
| - } |
111 |
| - CaseMappingIter::Two(b, c) => { |
112 |
| - *self = CaseMappingIter::One(c); |
113 |
| - Some(b) |
114 |
| - } |
115 |
| - CaseMappingIter::One(c) => { |
116 |
| - *self = CaseMappingIter::Zero; |
117 |
| - Some(c) |
118 |
| - } |
119 |
| - CaseMappingIter::Zero => None, |
120 |
| - } |
| 100 | + #[inline] |
| 101 | + fn len_utf16(self) -> usize { |
| 102 | + let ch = self as u32; |
| 103 | + if (ch & 0xFFFF) == ch { 1 } else { 2 } |
121 | 104 | }
|
122 |
| -} |
123 | 105 |
|
124 |
| -impl fmt::Display for CaseMappingIter { |
125 |
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
126 |
| - match *self { |
127 |
| - CaseMappingIter::Three(a, b, c) => { |
128 |
| - f.write_char(a)?; |
129 |
| - f.write_char(b)?; |
130 |
| - f.write_char(c) |
131 |
| - } |
132 |
| - CaseMappingIter::Two(b, c) => { |
133 |
| - f.write_char(b)?; |
134 |
| - f.write_char(c) |
135 |
| - } |
136 |
| - CaseMappingIter::One(c) => { |
137 |
| - f.write_char(c) |
138 |
| - } |
139 |
| - CaseMappingIter::Zero => Ok(()), |
| 106 | + #[inline] |
| 107 | + fn encode_utf8(self, dst: &mut [u8]) -> &mut str { |
| 108 | + let code = self as u32; |
| 109 | + unsafe { |
| 110 | + let len = |
| 111 | + if code < MAX_ONE_B && !dst.is_empty() { |
| 112 | + *dst.get_unchecked_mut(0) = code as u8; |
| 113 | + 1 |
| 114 | + } else if code < MAX_TWO_B && dst.len() >= 2 { |
| 115 | + *dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; |
| 116 | + *dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT; |
| 117 | + 2 |
| 118 | + } else if code < MAX_THREE_B && dst.len() >= 3 { |
| 119 | + *dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; |
| 120 | + *dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 121 | + *dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT; |
| 122 | + 3 |
| 123 | + } else if dst.len() >= 4 { |
| 124 | + *dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; |
| 125 | + *dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; |
| 126 | + *dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; |
| 127 | + *dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT; |
| 128 | + 4 |
| 129 | + } else { |
| 130 | + panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", |
| 131 | + from_u32_unchecked(code).len_utf8(), |
| 132 | + code, |
| 133 | + dst.len()) |
| 134 | + }; |
| 135 | + from_utf8_unchecked_mut(dst.get_unchecked_mut(..len)) |
140 | 136 | }
|
141 | 137 | }
|
142 |
| -} |
143 |
| - |
144 |
| -#[stable(feature = "char_struct_display", since = "1.16.0")] |
145 |
| -impl fmt::Display for ToLowercase { |
146 |
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
147 |
| - fmt::Display::fmt(&self.0, f) |
148 |
| - } |
149 |
| -} |
150 | 138 |
|
151 |
| -#[stable(feature = "char_struct_display", since = "1.16.0")] |
152 |
| -impl fmt::Display for ToUppercase { |
153 |
| - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
154 |
| - fmt::Display::fmt(&self.0, f) |
| 139 | + #[inline] |
| 140 | + fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] { |
| 141 | + let mut code = self as u32; |
| 142 | + unsafe { |
| 143 | + if (code & 0xFFFF) == code && !dst.is_empty() { |
| 144 | + // The BMP falls through (assuming non-surrogate, as it should) |
| 145 | + *dst.get_unchecked_mut(0) = code as u16; |
| 146 | + slice::from_raw_parts_mut(dst.as_mut_ptr(), 1) |
| 147 | + } else if dst.len() >= 2 { |
| 148 | + // Supplementary planes break into surrogates. |
| 149 | + code -= 0x1_0000; |
| 150 | + *dst.get_unchecked_mut(0) = 0xD800 | ((code >> 10) as u16); |
| 151 | + *dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF); |
| 152 | + slice::from_raw_parts_mut(dst.as_mut_ptr(), 2) |
| 153 | + } else { |
| 154 | + panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}", |
| 155 | + from_u32_unchecked(code).len_utf16(), |
| 156 | + code, |
| 157 | + dst.len()) |
| 158 | + } |
| 159 | + } |
155 | 160 | }
|
156 | 161 | }
|
157 | 162 |
|
|
0 commit comments