Skip to content

Commit 68f89fc

Browse files
committed
Make std::char functions and constants associated to char.
1 parent e5f35df commit 68f89fc

File tree

2 files changed

+242
-2
lines changed

2 files changed

+242
-2
lines changed

src/libcore/char/methods.rs

+240
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,246 @@ use super::*;
99

1010
#[lang = "char"]
1111
impl char {
12+
/// The highest valid code point a `char` can have.
13+
///
14+
/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
15+
/// Point], but only ones within a certain range. `MAX` is the highest valid
16+
/// code point that's a valid [Unicode Scalar Value].
17+
///
18+
/// [`char`]: ../../std/primitive.char.html
19+
/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
20+
/// [Code Point]: http://www.unicode.org/glossary/#code_point
21+
#[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
22+
pub const MAX: char = '\u{10ffff}';
23+
24+
/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
25+
/// decoding error.
26+
///
27+
/// It can occur, for example, when giving ill-formed UTF-8 bytes to
28+
/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
29+
#[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
30+
pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
31+
32+
/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
33+
/// `char` and `str` methods are based on.
34+
///
35+
/// New versions of Unicode are released regularly and subsequently all methods
36+
/// in the standard library depending on Unicode are updated. Therefore the
37+
/// behavior of some `char` and `str` methods and the value of this constant
38+
/// changes over time. This is *not* considered to be a breaking change.
39+
///
40+
/// The version numbering scheme is explained in
41+
/// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
42+
#[unstable(feature = "assoc_char_consts", reason = "recently added", issue = "71763")]
43+
pub const UNICODE_VERSION: (u8, u8, u8) = crate::unicode::UNICODE_VERSION;
44+
45+
/// Creates an iterator over the UTF-16 encoded code points in `iter`,
46+
/// returning unpaired surrogates as `Err`s.
47+
///
48+
/// # Examples
49+
///
50+
/// Basic usage:
51+
///
52+
/// ```
53+
/// use std::char::decode_utf16;
54+
///
55+
/// // 𝄞mus<invalid>ic<invalid>
56+
/// let v = [
57+
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
58+
/// ];
59+
///
60+
/// assert_eq!(
61+
/// decode_utf16(v.iter().cloned())
62+
/// .map(|r| r.map_err(|e| e.unpaired_surrogate()))
63+
/// .collect::<Vec<_>>(),
64+
/// vec![
65+
/// Ok('𝄞'),
66+
/// Ok('m'), Ok('u'), Ok('s'),
67+
/// Err(0xDD1E),
68+
/// Ok('i'), Ok('c'),
69+
/// Err(0xD834)
70+
/// ]
71+
/// );
72+
/// ```
73+
///
74+
/// A lossy decoder can be obtained by replacing `Err` results with the replacement character:
75+
///
76+
/// ```
77+
/// use std::char::{decode_utf16, REPLACEMENT_CHARACTER};
78+
///
79+
/// // 𝄞mus<invalid>ic<invalid>
80+
/// let v = [
81+
/// 0xD834, 0xDD1E, 0x006d, 0x0075, 0x0073, 0xDD1E, 0x0069, 0x0063, 0xD834,
82+
/// ];
83+
///
84+
/// assert_eq!(
85+
/// decode_utf16(v.iter().cloned())
86+
/// .map(|r| r.unwrap_or(REPLACEMENT_CHARACTER))
87+
/// .collect::<String>(),
88+
/// "𝄞mus�ic�"
89+
/// );
90+
/// ```
91+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
92+
#[inline]
93+
pub fn decode_utf16<I: IntoIterator<Item = u16>>(iter: I) -> DecodeUtf16<I::IntoIter> {
94+
super::decode::decode_utf16(iter)
95+
}
96+
97+
/// Converts a `u32` to a `char`.
98+
///
99+
/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
100+
/// `as`:
101+
///
102+
/// ```
103+
/// let c = '💯';
104+
/// let i = c as u32;
105+
///
106+
/// assert_eq!(128175, i);
107+
/// ```
108+
///
109+
/// However, the reverse is not true: not all valid [`u32`]s are valid
110+
/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
111+
/// for a [`char`].
112+
///
113+
/// [`char`]: ../../std/primitive.char.html
114+
/// [`u32`]: ../../std/primitive.u32.html
115+
///
116+
/// For an unsafe version of this function which ignores these checks, see
117+
/// [`from_u32_unchecked`].
118+
///
119+
/// [`from_u32_unchecked`]: fn.from_u32_unchecked.html
120+
///
121+
/// # Examples
122+
///
123+
/// Basic usage:
124+
///
125+
/// ```
126+
/// use std::char;
127+
///
128+
/// let c = char::from_u32(0x2764);
129+
///
130+
/// assert_eq!(Some('❤'), c);
131+
/// ```
132+
///
133+
/// Returning `None` when the input is not a valid [`char`]:
134+
///
135+
/// ```
136+
/// use std::char;
137+
///
138+
/// let c = char::from_u32(0x110000);
139+
///
140+
/// assert_eq!(None, c);
141+
/// ```
142+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
143+
#[inline]
144+
pub fn from_u32(i: u32) -> Option<char> {
145+
super::convert::from_u32(i)
146+
}
147+
148+
/// Converts a `u32` to a `char`, ignoring validity.
149+
///
150+
/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with
151+
/// `as`:
152+
///
153+
/// ```
154+
/// let c = '💯';
155+
/// let i = c as u32;
156+
///
157+
/// assert_eq!(128175, i);
158+
/// ```
159+
///
160+
/// However, the reverse is not true: not all valid [`u32`]s are valid
161+
/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
162+
/// [`char`], possibly creating an invalid one.
163+
///
164+
/// [`char`]: ../../std/primitive.char.html
165+
/// [`u32`]: ../../std/primitive.u32.html
166+
///
167+
/// # Safety
168+
///
169+
/// This function is unsafe, as it may construct invalid `char` values.
170+
///
171+
/// For a safe version of this function, see the [`from_u32`] function.
172+
///
173+
/// [`from_u32`]: fn.from_u32.html
174+
///
175+
/// # Examples
176+
///
177+
/// Basic usage:
178+
///
179+
/// ```
180+
/// use std::char;
181+
///
182+
/// let c = unsafe { char::from_u32_unchecked(0x2764) };
183+
///
184+
/// assert_eq!('❤', c);
185+
/// ```
186+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
187+
#[inline]
188+
pub unsafe fn from_u32_unchecked(i: u32) -> char {
189+
super::convert::from_u32_unchecked(i)
190+
}
191+
192+
/// Converts a digit in the given radix to a `char`.
193+
///
194+
/// A 'radix' here is sometimes also called a 'base'. A radix of two
195+
/// indicates a binary number, a radix of ten, decimal, and a radix of
196+
/// sixteen, hexadecimal, to give some common values. Arbitrary
197+
/// radices are supported.
198+
///
199+
/// `from_digit()` will return `None` if the input is not a digit in
200+
/// the given radix.
201+
///
202+
/// # Panics
203+
///
204+
/// Panics if given a radix larger than 36.
205+
///
206+
/// # Examples
207+
///
208+
/// Basic usage:
209+
///
210+
/// ```
211+
/// use std::char;
212+
///
213+
/// let c = char::from_digit(4, 10);
214+
///
215+
/// assert_eq!(Some('4'), c);
216+
///
217+
/// // Decimal 11 is a single digit in base 16
218+
/// let c = char::from_digit(11, 16);
219+
///
220+
/// assert_eq!(Some('b'), c);
221+
/// ```
222+
///
223+
/// Returning `None` when the input is not a digit:
224+
///
225+
/// ```
226+
/// use std::char;
227+
///
228+
/// let c = char::from_digit(20, 10);
229+
///
230+
/// assert_eq!(None, c);
231+
/// ```
232+
///
233+
/// Passing a large radix, causing a panic:
234+
///
235+
/// ```
236+
/// use std::thread;
237+
/// use std::char;
238+
///
239+
/// let result = thread::spawn(|| {
240+
/// // this panics
241+
/// let c = char::from_digit(1, 37);
242+
/// }).join();
243+
///
244+
/// assert!(result.is_err());
245+
/// ```
246+
#[unstable(feature = "assoc_char_funcs", reason = "recently added", issue = "71763")]
247+
#[inline]
248+
pub fn from_digit(num: u32, radix: u32) -> Option<char> {
249+
super::convert::from_digit(num, radix)
250+
}
251+
12252
/// Checks if a `char` is a digit in the given radix.
13253
///
14254
/// A 'radix' here is sometimes also called a 'base'. A radix of two

src/libcore/char/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -92,15 +92,15 @@ const MAX_THREE_B: u32 = 0x10000;
9292
/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
9393
/// [Code Point]: http://www.unicode.org/glossary/#code_point
9494
#[stable(feature = "rust1", since = "1.0.0")]
95-
pub const MAX: char = '\u{10ffff}';
95+
pub const MAX: char = char::MAX;
9696

9797
/// `U+FFFD REPLACEMENT CHARACTER` (�) is used in Unicode to represent a
9898
/// decoding error.
9999
///
100100
/// It can occur, for example, when giving ill-formed UTF-8 bytes to
101101
/// [`String::from_utf8_lossy`](../../std/string/struct.String.html#method.from_utf8_lossy).
102102
#[stable(feature = "decode_utf16", since = "1.9.0")]
103-
pub const REPLACEMENT_CHARACTER: char = '\u{FFFD}';
103+
pub const REPLACEMENT_CHARACTER: char = char::REPLACEMENT_CHARACTER;
104104

105105
/// Returns an iterator that yields the hexadecimal Unicode escape of a
106106
/// character, as `char`s.

0 commit comments

Comments
 (0)