Skip to content

Commit 05f9586

Browse files
committed
auto merge of #5980 : Kimundi/rust/ascii-encoding, r=thestinger
Added Ascii type to use for byte inputs that are known to contain Ascii only.
2 parents aba93c6 + bf4f088 commit 05f9586

File tree

6 files changed

+320
-31
lines changed

6 files changed

+320
-31
lines changed

src/libcore/char.rs

+16-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -234,6 +234,21 @@ pub fn escape_default(c: char) -> ~str {
234234
}
235235
}
236236

237+
/// Returns the amount of bytes this character would need if encoded in utf8
238+
pub fn len_utf8_bytes(c: char) -> uint {
239+
static max_one_b: uint = 128u;
240+
static max_two_b: uint = 2048u;
241+
static max_three_b: uint = 65536u;
242+
static max_four_b: uint = 2097152u;
243+
244+
let code = c as uint;
245+
if code < max_one_b { 1u }
246+
else if code < max_two_b { 2u }
247+
else if code < max_three_b { 3u }
248+
else if code < max_four_b { 4u }
249+
else { fail!(~"invalid character!") }
250+
}
251+
237252
/**
238253
* Compare two chars
239254
*
@@ -334,7 +349,6 @@ fn test_escape_default() {
334349
assert_eq!(escape_default('\U0001d4b6'), ~"\\U0001d4b6");
335350
}
336351
337-
338352
#[test]
339353
fn test_escape_unicode() {
340354
assert_eq!(escape_unicode('\x00'), ~"\\x00");

src/libcore/core.rc

+3
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ pub mod vec;
164164
pub mod at_vec;
165165
pub mod str;
166166

167+
#[path = "str/ascii.rs"]
168+
pub mod ascii;
169+
167170
pub mod ptr;
168171
pub mod owned;
169172
pub mod managed;

src/libcore/prelude.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ pub use path::Path;
4545
pub use path::PosixPath;
4646
pub use path::WindowsPath;
4747
pub use ptr::Ptr;
48+
pub use ascii::{Ascii, AsciiCast, OwnedAsciiCast, AsciiStr};
4849
pub use str::{StrSlice, OwnedStr};
4950
pub use to_bytes::IterBytes;
50-
pub use to_str::ToStr;
51+
pub use to_str::{ToStr, ToStrConsume};
5152
pub use tuple::{CopyableTuple, ImmutableTuple, ExtendedTupleOps};
5253
pub use vec::{CopyableVector, ImmutableVector};
5354
pub use vec::{ImmutableEqVector, ImmutableCopyableVector};

src/libcore/str.rs

+25-28
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -789,16 +789,18 @@ pub fn each_split_within<'a>(ss: &'a str,
789789

790790
/// Convert a string to lowercase. ASCII only
791791
pub fn to_lower(s: &str) -> ~str {
792-
map(s,
793-
|c| unsafe{(libc::tolower(c as libc::c_char)) as char}
794-
)
792+
do map(s) |c| {
793+
assert!(char::is_ascii(c));
794+
(unsafe{libc::tolower(c as libc::c_char)}) as char
795+
}
795796
}
796797

797798
/// Convert a string to uppercase. ASCII only
798799
pub fn to_upper(s: &str) -> ~str {
799-
map(s,
800-
|c| unsafe{(libc::toupper(c as libc::c_char)) as char}
801-
)
800+
do map(s) |c| {
801+
assert!(char::is_ascii(c));
802+
(unsafe{libc::toupper(c as libc::c_char)}) as char
803+
}
802804
}
803805

804806
/**
@@ -2317,20 +2319,20 @@ pub mod raw {
23172319
}
23182320

23192321
/// Removes the last byte from a string and returns it. (Not UTF-8 safe).
2320-
pub fn pop_byte(s: &mut ~str) -> u8 {
2322+
pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
23212323
let len = len(*s);
23222324
assert!((len > 0u));
23232325
let b = s[len - 1u];
2324-
unsafe { set_len(s, len - 1u) };
2326+
set_len(s, len - 1u);
23252327
return b;
23262328
}
23272329

23282330
/// Removes the first byte from a string and returns it. (Not UTF-8 safe).
2329-
pub fn shift_byte(s: &mut ~str) -> u8 {
2331+
pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
23302332
let len = len(*s);
23312333
assert!((len > 0u));
23322334
let b = s[0];
2333-
*s = unsafe { raw::slice_bytes_owned(*s, 1u, len) };
2335+
*s = raw::slice_bytes_owned(*s, 1u, len);
23342336
return b;
23352337
}
23362338

@@ -3096,12 +3098,11 @@ mod tests {
30963098
30973099
#[test]
30983100
fn test_to_lower() {
3099-
unsafe {
3100-
assert!(~"" == map(~"",
3101-
|c| libc::tolower(c as c_char) as char));
3102-
assert!(~"ymca" == map(~"YMCA",
3103-
|c| libc::tolower(c as c_char) as char));
3104-
}
3101+
// libc::tolower, and hence str::to_lower
3102+
// are culturally insensitive: they only work for ASCII
3103+
// (see Issue #1347)
3104+
assert!(~"" == to_lower(""));
3105+
assert!(~"ymca" == to_lower("YMCA"));
31053106
}
31063107
31073108
#[test]
@@ -3346,15 +3347,15 @@ mod tests {
33463347
#[test]
33473348
fn test_shift_byte() {
33483349
let mut s = ~"ABC";
3349-
let b = raw::shift_byte(&mut s);
3350+
let b = unsafe{raw::shift_byte(&mut s)};
33503351
assert!((s == ~"BC"));
33513352
assert!((b == 65u8));
33523353
}
33533354
33543355
#[test]
33553356
fn test_pop_byte() {
33563357
let mut s = ~"ABC";
3357-
let b = raw::pop_byte(&mut s);
3358+
let b = unsafe{raw::pop_byte(&mut s)};
33583359
assert!((s == ~"AB"));
33593360
assert!((b == 67u8));
33603361
}
@@ -3666,12 +3667,8 @@ mod tests {
36663667

36673668
#[test]
36683669
fn test_map() {
3669-
unsafe {
3670-
assert!(~"" == map(~"", |c|
3671-
libc::toupper(c as c_char) as char));
3672-
assert!(~"YMCA" == map(~"ymca",
3673-
|c| libc::toupper(c as c_char) as char));
3674-
}
3670+
assert!(~"" == map(~"", |c| unsafe {libc::toupper(c as c_char)} as char));
3671+
assert!(~"YMCA" == map(~"ymca", |c| unsafe {libc::toupper(c as c_char)} as char));
36753672
}
36763673
36773674
#[test]
@@ -3685,11 +3682,11 @@ mod tests {
36853682
36863683
#[test]
36873684
fn test_any() {
3688-
assert!(false == any(~"", char::is_uppercase));
3685+
assert!(false == any(~"", char::is_uppercase));
36893686
assert!(false == any(~"ymca", char::is_uppercase));
36903687
assert!(true == any(~"YMCA", char::is_uppercase));
3691-
assert!(true == any(~"yMCA", char::is_uppercase));
3692-
assert!(true == any(~"Ymcy", char::is_uppercase));
3688+
assert!(true == any(~"yMCA", char::is_uppercase));
3689+
assert!(true == any(~"Ymcy", char::is_uppercase));
36933690
}
36943691
36953692
#[test]

0 commit comments

Comments
 (0)