|
| 1 | +// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
| 2 | +// file at the top-level directory of this distribution and at |
| 3 | +// http://rust-lang.org/COPYRIGHT. |
| 4 | +// |
| 5 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | +// option. This file may not be copied, modified, or distributed |
| 9 | +// except according to those terms. |
| 10 | + |
| 11 | +//! Character conversions. |
| 12 | +
|
| 13 | +use convert::TryFrom; |
| 14 | +use fmt; |
| 15 | +use mem::transmute; |
| 16 | +use str::FromStr; |
| 17 | +use super::MAX; |
| 18 | + |
| 19 | +/// Converts a `u32` to a `char`. |
| 20 | +/// |
| 21 | +/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with |
| 22 | +/// [`as`]: |
| 23 | +/// |
| 24 | +/// ``` |
| 25 | +/// let c = '💯'; |
| 26 | +/// let i = c as u32; |
| 27 | +/// |
| 28 | +/// assert_eq!(128175, i); |
| 29 | +/// ``` |
| 30 | +/// |
| 31 | +/// However, the reverse is not true: not all valid [`u32`]s are valid |
| 32 | +/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value |
| 33 | +/// for a [`char`]. |
| 34 | +/// |
| 35 | +/// [`char`]: ../../std/primitive.char.html |
| 36 | +/// [`u32`]: ../../std/primitive.u32.html |
| 37 | +/// [`as`]: ../../book/first-edition/casting-between-types.html#as |
| 38 | +/// |
| 39 | +/// For an unsafe version of this function which ignores these checks, see |
| 40 | +/// [`from_u32_unchecked`]. |
| 41 | +/// |
| 42 | +/// [`from_u32_unchecked`]: fn.from_u32_unchecked.html |
| 43 | +/// |
| 44 | +/// # Examples |
| 45 | +/// |
| 46 | +/// Basic usage: |
| 47 | +/// |
| 48 | +/// ``` |
| 49 | +/// use std::char; |
| 50 | +/// |
| 51 | +/// let c = char::from_u32(0x2764); |
| 52 | +/// |
| 53 | +/// assert_eq!(Some('❤'), c); |
| 54 | +/// ``` |
| 55 | +/// |
| 56 | +/// Returning `None` when the input is not a valid [`char`]: |
| 57 | +/// |
| 58 | +/// ``` |
| 59 | +/// use std::char; |
| 60 | +/// |
| 61 | +/// let c = char::from_u32(0x110000); |
| 62 | +/// |
| 63 | +/// assert_eq!(None, c); |
| 64 | +/// ``` |
| 65 | +#[inline] |
| 66 | +#[stable(feature = "rust1", since = "1.0.0")] |
| 67 | +pub fn from_u32(i: u32) -> Option<char> { |
| 68 | + char::try_from(i).ok() |
| 69 | +} |
| 70 | + |
| 71 | +/// Converts a `u32` to a `char`, ignoring validity. |
| 72 | +/// |
| 73 | +/// Note that all [`char`]s are valid [`u32`]s, and can be cast to one with |
| 74 | +/// [`as`]: |
| 75 | +/// |
| 76 | +/// ``` |
| 77 | +/// let c = '💯'; |
| 78 | +/// let i = c as u32; |
| 79 | +/// |
| 80 | +/// assert_eq!(128175, i); |
| 81 | +/// ``` |
| 82 | +/// |
| 83 | +/// However, the reverse is not true: not all valid [`u32`]s are valid |
| 84 | +/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to |
| 85 | +/// [`char`], possibly creating an invalid one. |
| 86 | +/// |
| 87 | +/// [`char`]: ../../std/primitive.char.html |
| 88 | +/// [`u32`]: ../../std/primitive.u32.html |
| 89 | +/// [`as`]: ../../book/first-edition/casting-between-types.html#as |
| 90 | +/// |
| 91 | +/// # Safety |
| 92 | +/// |
| 93 | +/// This function is unsafe, as it may construct invalid `char` values. |
| 94 | +/// |
| 95 | +/// For a safe version of this function, see the [`from_u32`] function. |
| 96 | +/// |
| 97 | +/// [`from_u32`]: fn.from_u32.html |
| 98 | +/// |
| 99 | +/// # Examples |
| 100 | +/// |
| 101 | +/// Basic usage: |
| 102 | +/// |
| 103 | +/// ``` |
| 104 | +/// use std::char; |
| 105 | +/// |
| 106 | +/// let c = unsafe { char::from_u32_unchecked(0x2764) }; |
| 107 | +/// |
| 108 | +/// assert_eq!('❤', c); |
| 109 | +/// ``` |
| 110 | +#[inline] |
| 111 | +#[stable(feature = "char_from_unchecked", since = "1.5.0")] |
| 112 | +pub unsafe fn from_u32_unchecked(i: u32) -> char { |
| 113 | + transmute(i) |
| 114 | +} |
| 115 | + |
| 116 | +#[stable(feature = "char_convert", since = "1.13.0")] |
| 117 | +impl From<char> for u32 { |
| 118 | + #[inline] |
| 119 | + fn from(c: char) -> Self { |
| 120 | + c as u32 |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF. |
| 125 | +/// |
| 126 | +/// Unicode is designed such that this effectively decodes bytes |
| 127 | +/// with the character encoding that IANA calls ISO-8859-1. |
| 128 | +/// This encoding is compatible with ASCII. |
| 129 | +/// |
| 130 | +/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen), |
| 131 | +/// which leaves some "blanks", byte values that are not assigned to any character. |
| 132 | +/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes. |
| 133 | +/// |
| 134 | +/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252, |
| 135 | +/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks |
| 136 | +/// to punctuation and various Latin characters. |
| 137 | +/// |
| 138 | +/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/) |
| 139 | +/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases |
| 140 | +/// for a superset of Windows-1252 that fills the remaining blanks with corresponding |
| 141 | +/// C0 and C1 control codes. |
| 142 | +#[stable(feature = "char_convert", since = "1.13.0")] |
| 143 | +impl From<u8> for char { |
| 144 | + #[inline] |
| 145 | + fn from(i: u8) -> Self { |
| 146 | + i as char |
| 147 | + } |
| 148 | +} |
| 149 | + |
| 150 | + |
| 151 | +/// An error which can be returned when parsing a char. |
| 152 | +#[stable(feature = "char_from_str", since = "1.20.0")] |
| 153 | +#[derive(Clone, Debug, PartialEq, Eq)] |
| 154 | +pub struct ParseCharError { |
| 155 | + kind: CharErrorKind, |
| 156 | +} |
| 157 | + |
| 158 | +impl ParseCharError { |
| 159 | + #[unstable(feature = "char_error_internals", |
| 160 | + reason = "this method should not be available publicly", |
| 161 | + issue = "0")] |
| 162 | + #[doc(hidden)] |
| 163 | + pub fn __description(&self) -> &str { |
| 164 | + match self.kind { |
| 165 | + CharErrorKind::EmptyString => { |
| 166 | + "cannot parse char from empty string" |
| 167 | + }, |
| 168 | + CharErrorKind::TooManyChars => "too many characters in string" |
| 169 | + } |
| 170 | + } |
| 171 | +} |
| 172 | + |
| 173 | +#[derive(Copy, Clone, Debug, PartialEq, Eq)] |
| 174 | +enum CharErrorKind { |
| 175 | + EmptyString, |
| 176 | + TooManyChars, |
| 177 | +} |
| 178 | + |
| 179 | +#[stable(feature = "char_from_str", since = "1.20.0")] |
| 180 | +impl fmt::Display for ParseCharError { |
| 181 | + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 182 | + self.__description().fmt(f) |
| 183 | + } |
| 184 | +} |
| 185 | + |
| 186 | + |
| 187 | +#[stable(feature = "char_from_str", since = "1.20.0")] |
| 188 | +impl FromStr for char { |
| 189 | + type Err = ParseCharError; |
| 190 | + |
| 191 | + #[inline] |
| 192 | + fn from_str(s: &str) -> Result<Self, Self::Err> { |
| 193 | + let mut chars = s.chars(); |
| 194 | + match (chars.next(), chars.next()) { |
| 195 | + (None, _) => { |
| 196 | + Err(ParseCharError { kind: CharErrorKind::EmptyString }) |
| 197 | + }, |
| 198 | + (Some(c), None) => Ok(c), |
| 199 | + _ => { |
| 200 | + Err(ParseCharError { kind: CharErrorKind::TooManyChars }) |
| 201 | + } |
| 202 | + } |
| 203 | + } |
| 204 | +} |
| 205 | + |
| 206 | + |
| 207 | +#[stable(feature = "try_from", since = "1.26.0")] |
| 208 | +impl TryFrom<u32> for char { |
| 209 | + type Error = CharTryFromError; |
| 210 | + |
| 211 | + #[inline] |
| 212 | + fn try_from(i: u32) -> Result<Self, Self::Error> { |
| 213 | + if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) { |
| 214 | + Err(CharTryFromError(())) |
| 215 | + } else { |
| 216 | + Ok(unsafe { from_u32_unchecked(i) }) |
| 217 | + } |
| 218 | + } |
| 219 | +} |
| 220 | + |
| 221 | +/// The error type returned when a conversion from u32 to char fails. |
| 222 | +#[stable(feature = "try_from", since = "1.26.0")] |
| 223 | +#[derive(Copy, Clone, Debug, PartialEq, Eq)] |
| 224 | +pub struct CharTryFromError(()); |
| 225 | + |
| 226 | +#[stable(feature = "try_from", since = "1.26.0")] |
| 227 | +impl fmt::Display for CharTryFromError { |
| 228 | + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 229 | + "converted integer out of range for `char`".fmt(f) |
| 230 | + } |
| 231 | +} |
| 232 | + |
| 233 | +/// Converts a digit in the given radix to a `char`. |
| 234 | +/// |
| 235 | +/// A 'radix' here is sometimes also called a 'base'. A radix of two |
| 236 | +/// indicates a binary number, a radix of ten, decimal, and a radix of |
| 237 | +/// sixteen, hexadecimal, to give some common values. Arbitrary |
| 238 | +/// radices are supported. |
| 239 | +/// |
| 240 | +/// `from_digit()` will return `None` if the input is not a digit in |
| 241 | +/// the given radix. |
| 242 | +/// |
| 243 | +/// # Panics |
| 244 | +/// |
| 245 | +/// Panics if given a radix larger than 36. |
| 246 | +/// |
| 247 | +/// # Examples |
| 248 | +/// |
| 249 | +/// Basic usage: |
| 250 | +/// |
| 251 | +/// ``` |
| 252 | +/// use std::char; |
| 253 | +/// |
| 254 | +/// let c = char::from_digit(4, 10); |
| 255 | +/// |
| 256 | +/// assert_eq!(Some('4'), c); |
| 257 | +/// |
| 258 | +/// // Decimal 11 is a single digit in base 16 |
| 259 | +/// let c = char::from_digit(11, 16); |
| 260 | +/// |
| 261 | +/// assert_eq!(Some('b'), c); |
| 262 | +/// ``` |
| 263 | +/// |
| 264 | +/// Returning `None` when the input is not a digit: |
| 265 | +/// |
| 266 | +/// ``` |
| 267 | +/// use std::char; |
| 268 | +/// |
| 269 | +/// let c = char::from_digit(20, 10); |
| 270 | +/// |
| 271 | +/// assert_eq!(None, c); |
| 272 | +/// ``` |
| 273 | +/// |
| 274 | +/// Passing a large radix, causing a panic: |
| 275 | +/// |
| 276 | +/// ``` |
| 277 | +/// use std::thread; |
| 278 | +/// use std::char; |
| 279 | +/// |
| 280 | +/// let result = thread::spawn(|| { |
| 281 | +/// // this panics |
| 282 | +/// let c = char::from_digit(1, 37); |
| 283 | +/// }).join(); |
| 284 | +/// |
| 285 | +/// assert!(result.is_err()); |
| 286 | +/// ``` |
| 287 | +#[inline] |
| 288 | +#[stable(feature = "rust1", since = "1.0.0")] |
| 289 | +pub fn from_digit(num: u32, radix: u32) -> Option<char> { |
| 290 | + if radix > 36 { |
| 291 | + panic!("from_digit: radix is too high (maximum 36)"); |
| 292 | + } |
| 293 | + if num < radix { |
| 294 | + let num = num as u8; |
| 295 | + if num < 10 { |
| 296 | + Some((b'0' + num) as char) |
| 297 | + } else { |
| 298 | + Some((b'a' + num - 10) as char) |
| 299 | + } |
| 300 | + } else { |
| 301 | + None |
| 302 | + } |
| 303 | +} |
| 304 | + |
0 commit comments