Skip to content

Commit 8eee38c

Browse files
author
Clar Charr
committed
Move conversion logic from unicode.py to mapping_table.rs
1 parent 5b6c88b commit 8eee38c

File tree

6 files changed

+1430
-1331
lines changed

6 files changed

+1430
-1331
lines changed

src/libcore/char/methods.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ impl char {
779779
#[stable(feature = "rust1", since = "1.0.0")]
780780
#[inline]
781781
pub fn to_lowercase(self) -> ToLowercase {
782-
ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
782+
ToLowercase(conversions::Lowercase.lookup(self))
783783
}
784784

785785
/// Returns an iterator that yields the uppercase equivalent of a `char`
@@ -865,7 +865,7 @@ impl char {
865865
#[stable(feature = "rust1", since = "1.0.0")]
866866
#[inline]
867867
pub fn to_uppercase(self) -> ToUppercase {
868-
ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
868+
ToUppercase(conversions::Uppercase.lookup(self))
869869
}
870870

871871
/// Checks if the value is within the ASCII range.

src/libcore/char/mod.rs

+24-72
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ pub use self::decode::{decode_utf8, DecodeUtf8, InvalidSequence};
5858

5959
use fmt::{self, Write};
6060
use iter::FusedIterator;
61+
use unicode::mapping_table::Lookup;
6162

6263
// UTF-8 ranges and tags for encoding characters
6364
const TAG_CONT: u8 = 0b1000_0000;
@@ -396,19 +397,33 @@ impl fmt::Display for EscapeDebug {
396397
/// [`char`]: ../../std/primitive.char.html
397398
#[stable(feature = "rust1", since = "1.0.0")]
398399
#[derive(Debug, Clone)]
399-
pub struct ToLowercase(CaseMappingIter);
400+
pub struct ToLowercase(Lookup);
400401

401402
#[stable(feature = "rust1", since = "1.0.0")]
402403
impl Iterator for ToLowercase {
403404
type Item = char;
405+
406+
#[inline]
404407
fn next(&mut self) -> Option<char> {
405408
self.0.next()
406409
}
410+
411+
#[inline]
412+
fn size_hint(&self) -> (usize, Option<usize>) {
413+
self.0.size_hint()
414+
}
407415
}
408416

409417
#[stable(feature = "fused", since = "1.26.0")]
410418
impl FusedIterator for ToLowercase {}
411419

420+
#[stable(feature = "char_struct_display", since = "1.16.0")]
421+
impl fmt::Display for ToLowercase {
422+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
423+
fmt::Display::fmt(&self.0, f)
424+
}
425+
}
426+
412427
/// Returns an iterator that yields the uppercase equivalent of a `char`.
413428
///
414429
/// This `struct` is created by the [`to_uppercase`] method on [`char`]. See
@@ -418,88 +433,25 @@ impl FusedIterator for ToLowercase {}
418433
/// [`char`]: ../../std/primitive.char.html
419434
#[stable(feature = "rust1", since = "1.0.0")]
420435
#[derive(Debug, Clone)]
421-
pub struct ToUppercase(CaseMappingIter);
436+
pub struct ToUppercase(Lookup);
422437

423438
#[stable(feature = "rust1", since = "1.0.0")]
424439
impl Iterator for ToUppercase {
425440
type Item = char;
426-
fn next(&mut self) -> Option<char> {
427-
self.0.next()
428-
}
429-
}
430-
431-
#[stable(feature = "fused", since = "1.26.0")]
432-
impl FusedIterator for ToUppercase {}
433-
434-
#[derive(Debug, Clone)]
435-
enum CaseMappingIter {
436-
Three(char, char, char),
437-
Two(char, char),
438-
One(char),
439-
Zero,
440-
}
441-
442-
impl CaseMappingIter {
443-
fn new(chars: [char; 3]) -> CaseMappingIter {
444-
if chars[2] == '\0' {
445-
if chars[1] == '\0' {
446-
CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
447-
} else {
448-
CaseMappingIter::Two(chars[0], chars[1])
449-
}
450-
} else {
451-
CaseMappingIter::Three(chars[0], chars[1], chars[2])
452-
}
453-
}
454-
}
455441

456-
impl Iterator for CaseMappingIter {
457-
type Item = char;
442+
#[inline]
458443
fn next(&mut self) -> Option<char> {
459-
match *self {
460-
CaseMappingIter::Three(a, b, c) => {
461-
*self = CaseMappingIter::Two(b, c);
462-
Some(a)
463-
}
464-
CaseMappingIter::Two(b, c) => {
465-
*self = CaseMappingIter::One(c);
466-
Some(b)
467-
}
468-
CaseMappingIter::One(c) => {
469-
*self = CaseMappingIter::Zero;
470-
Some(c)
471-
}
472-
CaseMappingIter::Zero => None,
473-
}
444+
self.0.next()
474445
}
475-
}
476446

477-
impl fmt::Display for CaseMappingIter {
478-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
479-
match *self {
480-
CaseMappingIter::Three(a, b, c) => {
481-
f.write_char(a)?;
482-
f.write_char(b)?;
483-
f.write_char(c)
484-
}
485-
CaseMappingIter::Two(b, c) => {
486-
f.write_char(b)?;
487-
f.write_char(c)
488-
}
489-
CaseMappingIter::One(c) => {
490-
f.write_char(c)
491-
}
492-
CaseMappingIter::Zero => Ok(()),
493-
}
447+
#[inline]
448+
fn size_hint(&self) -> (usize, Option<usize>) {
449+
self.0.size_hint()
494450
}
495451
}
496452

497-
#[stable(feature = "char_struct_display", since = "1.16.0")]
498-
impl fmt::Display for ToLowercase {
499-
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
500-
fmt::Display::fmt(&self.0, f)
501-
}
502-
}
453+
#[stable(feature = "fused", since = "1.26.0")]
454+
impl FusedIterator for ToUppercase {}
503455

504456
#[stable(feature = "char_struct_display", since = "1.16.0")]
505457
impl fmt::Display for ToUppercase {

src/libcore/unicode/mapping_table.rs

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
use fmt;
11+
use iter::Cloned;
12+
use slice::Iter;
13+
14+
/// This is just a table which allows mapping from a character to a string,
15+
/// which at the moment is only used for `to_lowercase` and `to_uppercase`.
16+
pub struct MappingTable {
17+
pub(crate) table: &'static [(char, [char; 3])],
18+
}
19+
impl MappingTable {
20+
pub fn lookup(&self, c: char) -> Lookup {
21+
let search = self.table.binary_search_by(|&(key, _)| key.cmp(&c)).ok();
22+
match search {
23+
None => Lookup(LookupInner::Same(c)),
24+
Some(index) => {
25+
let s = &self.table[index].1;
26+
match s.iter().position(|&c| c == '\0') {
27+
None => Lookup(LookupInner::Iter(s.iter().cloned())),
28+
Some(p) => Lookup(LookupInner::Iter(s[..p].iter().cloned())),
29+
}
30+
}
31+
}
32+
}
33+
}
34+
35+
#[derive(Clone)]
36+
pub enum LookupInner {
37+
Same(char),
38+
Iter(Cloned<Iter<'static, char>>),
39+
}
40+
41+
/// Iterator over the characters in a mapping.
42+
#[derive(Clone)]
43+
pub struct Lookup(LookupInner);
44+
45+
impl Iterator for Lookup {
46+
type Item = char;
47+
48+
#[inline]
49+
fn next(&mut self) -> Option<char> {
50+
let next;
51+
match &mut self.0 {
52+
LookupInner::Iter(iter) => return iter.next(),
53+
LookupInner::Same(c) => {
54+
next = *c;
55+
}
56+
}
57+
self.0 = LookupInner::Iter([].iter().cloned());
58+
Some(next)
59+
}
60+
61+
#[inline]
62+
fn size_hint(&self) -> (usize, Option<usize>) {
63+
match &self.0 {
64+
LookupInner::Same(_) => (1, Some(1)),
65+
LookupInner::Iter(iter) => iter.size_hint(),
66+
}
67+
}
68+
}
69+
70+
impl fmt::Debug for Lookup {
71+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
72+
f.debug_list().entries(self.clone()).finish()
73+
}
74+
}
75+
76+
impl fmt::Display for Lookup {
77+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
78+
for c in self.clone() {
79+
fmt::Write::write_char(f, c)?;
80+
}
81+
Ok(())
82+
}
83+
}

src/libcore/unicode/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#![allow(missing_docs)]
1313

1414
mod bool_trie;
15+
pub(crate) mod mapping_table;
1516
pub(crate) mod printable;
1617
pub(crate) mod tables;
1718
pub(crate) mod version;

0 commit comments

Comments
 (0)