Skip to content

Implement ASCII case-related method on Vec<u8> and &[u8] #15777

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/liblog/directive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::ascii::StrAsciiExt;
use std::ascii::AsciiExt;
use std::cmp;

#[deriving(Show, Clone)]
Expand Down
2 changes: 1 addition & 1 deletion src/librustc/lint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#![macro_escape]

use std::hash;
use std::ascii::StrAsciiExt;
use std::ascii::AsciiExt;
use syntax::codemap::Span;
use syntax::visit::FnKind;
use syntax::ast;
Expand Down
153 changes: 95 additions & 58 deletions src/libstd/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,19 @@ use iter::Iterator;
use mem;
use option::{Option, Some, None};
use slice::{ImmutableVector, MutableVector, Vector};
use str::{Str, StrAllocating, StrSlice};
use str::{Str, StrSlice};
use str;
use string::String;
use to_string::IntoStr;
use vec::Vec;

#[deprecated="this trait has been renamed to `AsciiExt`"]
pub use StrAsciiExt = self::AsciiExt;

#[deprecated="this trait has been renamed to `OwnedAsciiExt`"]
pub use OwnedStrAsciiExt = self::OwnedAsciiExt;


/// Datatype to hold one ascii character. It wraps a `u8`, with the highest bit always zero.
#[deriving(Clone, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub struct Ascii { chr: u8 }
Expand Down Expand Up @@ -366,108 +374,133 @@ impl IntoBytes for Vec<Ascii> {
}
}


/// Extension methods for ASCII-subset only operations on owned strings
pub trait OwnedStrAsciiExt {
pub trait OwnedAsciiExt {
/// Convert the string to ASCII upper case:
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
/// but non-ASCII letters are unchanged.
fn into_ascii_upper(self) -> String;
fn into_ascii_upper(self) -> Self;

/// Convert the string to ASCII lower case:
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
/// but non-ASCII letters are unchanged.
fn into_ascii_lower(self) -> String;
fn into_ascii_lower(self) -> Self;
}

/// Extension methods for ASCII-subset only operations on string slices
pub trait StrAsciiExt {
pub trait AsciiExt<T> {
/// Makes a copy of the string in ASCII upper case:
/// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
/// but non-ASCII letters are unchanged.
fn to_ascii_upper(&self) -> String;
fn to_ascii_upper(&self) -> T;

/// Makes a copy of the string in ASCII lower case:
/// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
/// but non-ASCII letters are unchanged.
fn to_ascii_lower(&self) -> String;
fn to_ascii_lower(&self) -> T;

/// Check that two strings are an ASCII case-insensitive match.
/// Same as `to_ascii_lower(a) == to_ascii_lower(b)`,
/// but without allocating and copying temporary strings.
fn eq_ignore_ascii_case(&self, other: &str) -> bool;
fn eq_ignore_ascii_case(&self, other: Self) -> bool;
}

impl<'a> StrAsciiExt for &'a str {
impl<'a> AsciiExt<String> for &'a str {
#[inline]
fn to_ascii_upper(&self) -> String {
unsafe { str_copy_map_bytes(*self, ASCII_UPPER_MAP) }
// Vec<u8>::to_ascii_upper() preserves the UTF-8 invariant.
unsafe { str::raw::from_utf8_owned(self.as_bytes().to_ascii_upper()) }
}

#[inline]
fn to_ascii_lower(&self) -> String {
unsafe { str_copy_map_bytes(*self, ASCII_LOWER_MAP) }
// Vec<u8>::to_ascii_lower() preserves the UTF-8 invariant.
unsafe { str::raw::from_utf8_owned(self.as_bytes().to_ascii_lower()) }
}

#[inline]
fn eq_ignore_ascii_case(&self, other: &str) -> bool {
self.len() == other.len() &&
self.as_bytes().iter().zip(other.as_bytes().iter()).all(
|(byte_self, byte_other)| {
ASCII_LOWER_MAP[*byte_self as uint] ==
ASCII_LOWER_MAP[*byte_other as uint]
})
self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
}
}

impl OwnedStrAsciiExt for String {
impl OwnedAsciiExt for String {
#[inline]
fn into_ascii_upper(self) -> String {
unsafe { str_map_bytes(self, ASCII_UPPER_MAP) }
// Vec<u8>::into_ascii_upper() preserves the UTF-8 invariant.
unsafe { str::raw::from_utf8_owned(self.into_bytes().into_ascii_upper()) }
}

#[inline]
fn into_ascii_lower(self) -> String {
unsafe { str_map_bytes(self, ASCII_LOWER_MAP) }
// Vec<u8>::into_ascii_lower() preserves the UTF-8 invariant.
unsafe { str::raw::from_utf8_owned(self.into_bytes().into_ascii_lower()) }
}
}

#[inline]
unsafe fn str_map_bytes(string: String, map: &'static [u8]) -> String {
let mut bytes = string.into_bytes();
impl<'a> AsciiExt<Vec<u8>> for &'a [u8] {
#[inline]
fn to_ascii_upper(&self) -> Vec<u8> {
self.iter().map(|&byte| ASCII_UPPER_MAP[byte as uint]).collect()
}

for b in bytes.mut_iter() {
*b = map[*b as uint];
#[inline]
fn to_ascii_lower(&self) -> Vec<u8> {
self.iter().map(|&byte| ASCII_LOWER_MAP[byte as uint]).collect()
}

String::from_utf8(bytes).unwrap()
#[inline]
fn eq_ignore_ascii_case(&self, other: &[u8]) -> bool {
self.len() == other.len() &&
self.iter().zip(other.iter()).all(
|(byte_self, byte_other)| {
ASCII_LOWER_MAP[*byte_self as uint] ==
ASCII_LOWER_MAP[*byte_other as uint]
})
}
}

#[inline]
unsafe fn str_copy_map_bytes(string: &str, map: &'static [u8]) -> String {
let mut s = String::from_str(string);
for b in s.as_mut_bytes().mut_iter() {
*b = map[*b as uint];
impl OwnedAsciiExt for Vec<u8> {
#[inline]
fn into_ascii_upper(mut self) -> Vec<u8> {
for byte in self.mut_iter() {
*byte = ASCII_UPPER_MAP[*byte as uint];
}
self
}

#[inline]
fn into_ascii_lower(mut self) -> Vec<u8> {
for byte in self.mut_iter() {
*byte = ASCII_LOWER_MAP[*byte as uint];
}
self
}
s.into_string()
}

static ASCII_LOWER_MAP: &'static [u8] = &[

pub static ASCII_LOWER_MAP: [u8, ..256] = [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
b'@',

b'a', b'b', b'c', b'd', b'e', b'f', b'g',
b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
b'x', b'y', b'z',

b'[', b'\\', b']', b'^', b'_',
b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g',
b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
b'x', b'y', b'z', b'{', b'|', b'}', b'~', 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
Expand All @@ -486,23 +519,27 @@ static ASCII_LOWER_MAP: &'static [u8] = &[
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
];

static ASCII_UPPER_MAP: &'static [u8] = &[
pub static ASCII_UPPER_MAP: [u8, ..256] = [
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
b' ', b'!', b'"', b'#', b'$', b'%', b'&', b'\'',
b'(', b')', b'*', b'+', b',', b'-', b'.', b'/',
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7',
b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G',
b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
b'X', b'Y', b'Z', b'[', b'\\', b']', b'^', b'_',
b'`',

b'A', b'B', b'C', b'D', b'E', b'F', b'G',
b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
b'X', b'Y', b'Z',

b'{', b'|', b'}', b'~', 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
Expand Down
2 changes: 1 addition & 1 deletion src/test/run-pass/issue-10683.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::ascii::StrAsciiExt;
use std::ascii::AsciiExt;

static NAME: &'static str = "hello world";

Expand Down