Skip to content

unicode.py refactor part 1 #50922

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,13 @@ __pycache__/
/obj/
/rt/
/rustllvm/
/src/libstd_unicode/DerivedCoreProperties.txt
/src/libstd_unicode/DerivedNormalizationProps.txt
/src/libstd_unicode/PropList.txt
/src/libstd_unicode/ReadMe.txt
/src/libstd_unicode/Scripts.txt
/src/libstd_unicode/SpecialCasing.txt
/src/libstd_unicode/UnicodeData.txt
/src/libcore/unicode/DerivedCoreProperties.txt
/src/libcore/unicode/DerivedNormalizationProps.txt
/src/libcore/unicode/PropList.txt
/src/libcore/unicode/ReadMe.txt
/src/libcore/unicode/Scripts.txt
/src/libcore/unicode/SpecialCasing.txt
/src/libcore/unicode/UnicodeData.txt
/stage[0-9]+/
/target
target/
Expand All @@ -105,4 +105,3 @@ version.texi
/src/target/

no_llvm_build

4 changes: 2 additions & 2 deletions src/libcore/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ impl char {
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn to_lowercase(self) -> ToLowercase {
ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
ToLowercase(conversions::Lowercase.lookup(self))
}

/// Returns an iterator that yields the uppercase equivalent of a `char`
Expand Down Expand Up @@ -865,7 +865,7 @@ impl char {
#[stable(feature = "rust1", since = "1.0.0")]
#[inline]
pub fn to_uppercase(self) -> ToUppercase {
ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
ToUppercase(conversions::Uppercase.lookup(self))
}

/// Checks if the value is within the ASCII range.
Expand Down
96 changes: 24 additions & 72 deletions src/libcore/char/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence};

use fmt::{self, Write};
use iter::FusedIterator;
use unicode::mapping_table::Lookup;

// UTF-8 ranges and tags for encoding characters
const TAG_CONT: u8 = 0b1000_0000;
Expand Down Expand Up @@ -396,19 +397,33 @@ impl fmt::Display for EscapeDebug {
/// [`char`]: ../../std/primitive.char.html
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Debug, Clone)]
pub struct ToLowercase(CaseMappingIter);
pub struct ToLowercase(Lookup);

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for ToLowercase {
type Item = char;

#[inline]
fn next(&mut self) -> Option<char> {
self.0.next()
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for ToLowercase {}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for ToLowercase {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}

/// Returns an iterator that yields the uppercase equivalent of a `char`.
///
/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
Expand All @@ -418,88 +433,25 @@ impl FusedIterator for ToLowercase {}
/// [`char`]: ../../std/primitive.char.html
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Debug, Clone)]
pub struct ToUppercase(CaseMappingIter);
pub struct ToUppercase(Lookup);

#[stable(feature = "rust1", since = "1.0.0")]
impl Iterator for ToUppercase {
type Item = char;
fn next(&mut self) -> Option<char> {
self.0.next()
}
}

#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for ToUppercase {}

#[derive(Debug, Clone)]
enum CaseMappingIter {
Three(char, char, char),
Two(char, char),
One(char),
Zero,
}

impl CaseMappingIter {
fn new(chars: [char; 3]) -> CaseMappingIter {
if chars[2] == '\0' {
if chars[1] == '\0' {
CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
} else {
CaseMappingIter::Two(chars[0], chars[1])
}
} else {
CaseMappingIter::Three(chars[0], chars[1], chars[2])
}
}
}

impl Iterator for CaseMappingIter {
type Item = char;
#[inline]
fn next(&mut self) -> Option<char> {
match *self {
CaseMappingIter::Three(a, b, c) => {
*self = CaseMappingIter::Two(b, c);
Some(a)
}
CaseMappingIter::Two(b, c) => {
*self = CaseMappingIter::One(c);
Some(b)
}
CaseMappingIter::One(c) => {
*self = CaseMappingIter::Zero;
Some(c)
}
CaseMappingIter::Zero => None,
}
self.0.next()
}
}

impl fmt::Display for CaseMappingIter {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
CaseMappingIter::Three(a, b, c) => {
f.write_char(a)?;
f.write_char(b)?;
f.write_char(c)
}
CaseMappingIter::Two(b, c) => {
f.write_char(b)?;
f.write_char(c)
}
CaseMappingIter::One(c) => {
f.write_char(c)
}
CaseMappingIter::Zero => Ok(()),
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for ToLowercase {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}
#[stable(feature = "fused", since = "1.26.0")]
impl FusedIterator for ToUppercase {}

#[stable(feature = "char_struct_display", since = "1.16.0")]
impl fmt::Display for ToUppercase {
Expand Down
83 changes: 83 additions & 0 deletions src/libcore/unicode/mapping_table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use fmt;
use iter::Cloned;
use slice::Iter;

/// This is just a table which allows mapping from a character to a string,
/// which at the moment is only used for `to_lowercase` and `to_uppercase`.
pub struct MappingTable {
pub(crate) table: &'static [(char, [char; 3])],
}
impl MappingTable {
pub fn lookup(&self, c: char) -> Lookup {
let search = self.table.binary_search_by(|&(key, _)| key.cmp(&c)).ok();
match search {
None => Lookup(LookupInner::Same(c)),
Some(index) => {
let s = &self.table[index].1;
match s.iter().position(|&c| c == '\0') {
None => Lookup(LookupInner::Iter(s.iter().cloned())),
Some(p) => Lookup(LookupInner::Iter(s[..p].iter().cloned())),
}
}
}
}
}

#[derive(Clone)]
pub enum LookupInner {
Same(char),
Iter(Cloned<Iter<'static, char>>),
}

/// Iterator over the characters in a mapping.
#[derive(Clone)]
pub struct Lookup(LookupInner);

impl Iterator for Lookup {
type Item = char;

#[inline]
fn next(&mut self) -> Option<char> {
let next;
match &mut self.0 {
LookupInner::Iter(iter) => return iter.next(),
LookupInner::Same(c) => {
next = *c;
}
}
self.0 = LookupInner::Iter([].iter().cloned());
Some(next)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match &self.0 {
LookupInner::Same(_) => (1, Some(1)),
LookupInner::Iter(iter) => iter.size_hint(),
}
}
}

impl fmt::Debug for Lookup {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_list().entries(self.clone()).finish()
}
}

impl fmt::Display for Lookup {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.clone() {
fmt::Write::write_char(f, c)?;
}
Ok(())
}
}
1 change: 1 addition & 0 deletions src/libcore/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#![allow(missing_docs)]

mod bool_trie;
pub(crate) mod mapping_table;
pub(crate) mod printable;
pub(crate) mod tables;
pub(crate) mod version;
Expand Down
Loading