Skip to content

Improve docs for std::char #30030

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 25, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 142 additions & 16 deletions src/libcore/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,63 @@ const MAX_THREE_B: u32 = 0x10000;
Cn Unassigned a reserved unassigned code point or a noncharacter
*/

/// The highest valid code point
/// The highest valid code point a `char` can have.
///
/// A [`char`] is a [Unicode Scalar Value], which means that it is a [Code
/// Point], but only ones within a certain range. `MAX` is the highest valid
/// code point that's a valid [Unicode Scalar Value].
///
/// [`char`]: primitive.char.html
/// [Unicode Scalar Value]: http://www.unicode.org/glossary/#unicode_scalar_value
/// [Code Point]: http://www.unicode.org/glossary/#code_point
#[stable(feature = "rust1", since = "1.0.0")]
pub const MAX: char = '\u{10ffff}';

/// Converts a `u32` to an `Option<char>`.
/// Converts a `u32` to a `char`.
///
/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with
/// [`as`]:
///
/// ```
/// let c = '💯';
/// let i = c as u32;
///
/// assert_eq!(128175, i);
/// ```
///
/// However, the reverse is not true: not all valid [`u32`]s are valid
/// [`char`]s. `from_u32()` will return `None` if the input is not a valid value
/// for a [`char`].
///
/// [`char`]: primitive.char.html
/// [`u32`]: primitive.u32.html
/// [`as`]: ../book/casting-between-types.html#as
///
/// For an unsafe version of this function which ignores these checks, see
/// [`from_u32_unchecked()`].
///
/// [`from_u32_unchecked()`]: fn.from_u32_unchecked.html
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::char;
///
/// assert_eq!(char::from_u32(0x2764), Some('❤'));
/// assert_eq!(char::from_u32(0x110000), None); // invalid character
/// let c = char::from_u32(0x2764);
///
/// assert_eq!(Some('❤'), c);
/// ```
///
/// Returning `None` when the input is not a valid [`char`]:
///
/// ```
/// use std::char;
///
/// let c = char::from_u32(0x110000);
///
/// assert_eq!(None, c);
/// ```
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
Expand All @@ -88,33 +132,104 @@ pub fn from_u32(i: u32) -> Option<char> {
}
}

/// Converts a `u32` to an `char`, not checking whether it is a valid unicode
/// codepoint.
/// Converts a `u32` to a `char`, ignoring validity.
///
/// Note that all [`char`]s are valid [`u32`]s, and can be casted to one with
/// [`as`]:
///
/// ```
/// let c = '💯';
/// let i = c as u32;
///
/// assert_eq!(128175, i);
/// ```
///
/// However, the reverse is not true: not all valid [`u32`]s are valid
/// [`char`]s. `from_u32_unchecked()` will ignore this, and blindly cast to
/// [`char`], possibly creating an invalid one.
///
/// [`char`]: primitive.char.html
/// [`u32`]: primitive.u32.html
/// [`as`]: ../book/casting-between-types.html#as
///
/// # Safety
///
/// This function is unsafe, as it may construct invalid `char` values.
///
/// For a safe version of this function, see the [`from_u32()`] function.
///
/// [`from_u32()`]: fn.from_u32.html
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::char;
///
/// let c = unsafe { char::from_u32_unchecked(0x2764) };
///
/// assert_eq!('❤', c);
/// ```
#[inline]
#[stable(feature = "char_from_unchecked", since = "1.5.0")]
pub unsafe fn from_u32_unchecked(i: u32) -> char {
transmute(i)
}

/// Converts a number to the character representing it.
/// Converts a digit in the given radix to a `char`.
///
/// # Return value
/// A 'radix' here is sometimes also called a 'base'. A radix of two
/// indicates a binary number, a radix of ten, decimal, and a radix of
/// sixteen, hexicdecimal, to give some common values. Arbitrary
/// radicum are supported.
///
/// Returns `Some(char)` if `num` represents one digit under `radix`,
/// using one character of `0-9` or `a-z`, or `None` if it doesn't.
/// `from_digit()` will return `None` if the input is not a digit in
/// the given radix.
///
/// # Panics
///
/// Panics if given an `radix` > 36.
/// Panics if given a radix larger than 36.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// use std::char;
///
/// let c = char::from_digit(4, 10);
///
/// assert_eq!(c, Some('4'));
/// assert_eq!(Some('4'), c);
///
/// // Decimal 11 is a single digit in base 16
/// let c = char::from_digit(11, 16);
///
/// assert_eq!(Some('b'), c);
/// ```
///
/// Returning `None` when the input is not a digit:
///
/// ```
/// use std::char;
///
/// let c = char::from_digit(20, 10);
///
/// assert_eq!(None, c);
/// ```
///
/// Passing a large radix, causing a panic:
///
/// ```
/// use std::thread;
/// use std::char;
///
/// let result = thread::spawn(|| {
/// // this panics
/// let c = char::from_digit(1, 37);
/// }).join();
///
/// assert!(result.is_err());
/// ```
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down Expand Up @@ -287,8 +402,14 @@ pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option<usize> {
}
}

/// An iterator over the characters that represent a `char`, as escaped by
/// Rust's unicode escaping rules.
/// Returns an iterator that yields the hexadecimal Unicode escape of a
/// character, as `char`s.
///
/// This `struct` is created by the [`escape_unicode()`] method on [`char`]. See
/// its documentation for more.
///
/// [`escape_unicode()`]: primitive.char.html#method.escape_unicode
/// [`char`]: primitive.char.html
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeUnicode {
Expand Down Expand Up @@ -362,8 +483,13 @@ impl Iterator for EscapeUnicode {
}
}

/// An iterator over the characters that represent a `char`, escaped
/// for maximum portability.
/// An iterator that yields the literal escape code of a `char`.
///
/// This `struct` is created by the [`escape_default()`] method on [`char`]. See
/// its documentation for more.
///
/// [`escape_default()`]: primitive.char.html#method.escape_default
/// [`char`]: primitive.char.html
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeDefault {
Expand Down
47 changes: 27 additions & 20 deletions src/librustc_unicode/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,23 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! Unicode scalar values
//! A character type.
//!
//! This module provides the `CharExt` trait, as well as its
//! implementation for the primitive `char` type, in order to allow
//! basic character manipulation.
//! The `char` type represents a single character. More specifically, since
//! 'character' isn't a well-defined concept in Unicode, `char` is a '[Unicode
//! scalar value]', which is similar to, but not the same as, a '[Unicode code
//! point]'.
//!
//! A `char` represents a
//! *[Unicode scalar
//! value](http://www.unicode.org/glossary/#unicode_scalar_value)*, as it can
//! contain any Unicode code point except high-surrogate and low-surrogate code
//! points.
//! [Unicode scalar value]: http://www.unicode.org/glossary/#unicode_scalar_value
//! [Unicode code point]: http://www.unicode.org/glossary/#code_point
//!
//! As such, only values in the ranges \[0x0,0xD7FF\] and \[0xE000,0x10FFFF\]
//! (inclusive) are allowed. A `char` can always be safely cast to a `u32`;
//! however the converse is not always true due to the above range limits
//! and, as such, should be performed via the `from_u32` function.
//! This module exists for technical reasons, the primary documentation for
//! `char` is directly on [the `char` primitive type](../primitive.char.html)
//! itself.
//!
//! *[See also the `char` primitive type](../primitive.char.html).*
//! This module is the home of the iterator implementations for the iterators
//! implemented on `char`, as well as some useful constants and conversion
//! functions that convert various types to `char`.

#![stable(feature = "rust1", since = "1.0.0")]

Expand All @@ -42,9 +41,13 @@ pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicod
#[unstable(feature = "unicode", issue = "27783")]
pub use tables::UNICODE_VERSION;

/// An iterator over the lowercase mapping of a given character, returned from
/// the [`to_lowercase` method](../primitive.char.html#method.to_lowercase) on
/// characters.
/// Returns an iterator that yields the lowercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_lowercase()`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_lowercase()`]: primitive.char.html#method.escape_to_lowercase
/// [`char`]: primitive.char.html
#[stable(feature = "rust1", since = "1.0.0")]
pub struct ToLowercase(CaseMappingIter);

Expand All @@ -56,9 +59,13 @@ impl Iterator for ToLowercase {
}
}

/// An iterator over the uppercase mapping of a given character, returned from
/// the [`to_uppercase` method](../primitive.char.html#method.to_uppercase) on
/// characters.
/// Returns an iterator that yields the uppercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_uppercase()`] method on [`char`]. See
/// its documentation for more.
///
/// [`to_uppercase()`]: primitive.char.html#method.escape_to_uppercase
/// [`char`]: primitive.char.html
#[stable(feature = "rust1", since = "1.0.0")]
pub struct ToUppercase(CaseMappingIter);

Expand Down