rust-lang · bors · Apr 23, 2013 · Apr 18, 2013 · Apr 20, 2013 · Apr 20, 2013
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
@@ -1,4 +1,4 @@
-// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -234,6 +234,21 @@ pub fn escape_default(c: char) -> ~str {
     }
 }
 
+/// Returns the amount of bytes this character would need if encoded in utf8
+pub fn len_utf8_bytes(c: char) -> uint {
+    static max_one_b: uint = 128u;
+    static max_two_b: uint = 2048u;
+    static max_three_b: uint = 65536u;
+    static max_four_b: uint = 2097152u;
+
+    let code = c as uint;
+    if code < max_one_b { 1u }
+    else if code < max_two_b { 2u }
+    else if code < max_three_b { 3u }
+    else if code < max_four_b { 4u }
+    else { fail!(~"invalid character!") }
+}
+
 /**
  * Compare two chars
  *
@@ -334,7 +349,6 @@ fn test_escape_default() {
     assert_eq!(escape_default('\U0001d4b6'), ~"\\U0001d4b6");
 }
 
-
 #[test]
 fn test_escape_unicode() {
     assert_eq!(escape_unicode('\x00'), ~"\\x00");

diff --git a/src/libcore/prelude.rs b/src/libcore/prelude.rs
@@ -40,9 +40,14 @@ pub use path::Path;
 pub use path::PosixPath;
 pub use path::WindowsPath;
 pub use ptr::Ptr;
+// NOTE: Remove markers after snapshot
+#[cfg(stage1)]
+#[cfg(stage2)]
+#[cfg(stage3)]
+pub use str::{Ascii, AsciiCast, OwnedAsciiCast, ToStrAscii};
 pub use str::{StrSlice, OwnedStr};
 pub use to_bytes::IterBytes;
-pub use to_str::ToStr;
+pub use to_str::{ToStr, ToStrConsume};
 pub use tuple::{CopyableTuple, ImmutableTuple, ExtendedTupleOps};
 pub use vec::{CopyableVector, ImmutableVector};
 pub use vec::{ImmutableEqVector, ImmutableCopyableVector};

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
@@ -1,4 +1,4 @@
-// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -17,6 +17,12 @@
  * some heavy-duty uses, try std::rope.
  */
 
+// NOTE: Remove markers after snapshot
+#[cfg(stage1)]
+#[cfg(stage2)]
+#[cfg(stage3)]
+pub use self::ascii::{Ascii, AsciiCast, OwnedAsciiCast, ToStrAscii};
+
 use at_vec;
 use cast;
 use char;
@@ -34,6 +40,13 @@ use to_str::ToStr;
 
 #[cfg(notest)] use cmp::{Eq, Ord, Equiv, TotalEq};
 
+// NOTE: Remove markers after snapshot
+#[cfg(stage1)]
+#[cfg(stage2)]
+#[cfg(stage3)]
+#[path = "str/ascii.rs"]
+mod ascii;
+
 /*
 Section: Creating a string
 */
@@ -789,16 +802,18 @@ pub fn each_split_within<'a>(ss: &'a str,
 
 /// Convert a string to lowercase. ASCII only
 pub fn to_lower(s: &str) -> ~str {
-    map(s,
-        |c| unsafe{(libc::tolower(c as libc::c_char)) as char}
-    )
+    do map(s) |c| {
+        assert!(char::is_ascii(c));
+        (unsafe{libc::tolower(c as libc::c_char)}) as char
+    }
 }
 
 /// Convert a string to uppercase. ASCII only
 pub fn to_upper(s: &str) -> ~str {
-    map(s,
-        |c| unsafe{(libc::toupper(c as libc::c_char)) as char}
-    )
+    do map(s) |c| {
+        assert!(char::is_ascii(c));
+        (unsafe{libc::toupper(c as libc::c_char)}) as char
+    }
 }
 
 /**
@@ -2317,20 +2332,20 @@ pub mod raw {
     }
 
     /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
-    pub fn pop_byte(s: &mut ~str) -> u8 {
+    pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
         let len = len(*s);
         assert!((len > 0u));
         let b = s[len - 1u];
-        unsafe { set_len(s, len - 1u) };
+        set_len(s, len - 1u);
         return b;
     }
 
     /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
-    pub fn shift_byte(s: &mut ~str) -> u8 {
+    pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
         let len = len(*s);
         assert!((len > 0u));
         let b = s[0];
-        *s = unsafe { raw::slice_bytes_owned(*s, 1u, len) };
+        *s = raw::slice_bytes_owned(*s, 1u, len);
         return b;
     }
 
@@ -3096,12 +3111,11 @@ mod tests {
 
     #[test]
     fn test_to_lower() {
-        unsafe {
-            assert!(~"" == map(~"",
-                |c| libc::tolower(c as c_char) as char));
-            assert!(~"ymca" == map(~"YMCA",
-                |c| libc::tolower(c as c_char) as char));
-        }
+        // libc::tolower, and hence str::to_lower
+        // are culturally insensitive: they only work for ASCII
+        // (see Issue #1347)
+        assert!(~"" == to_lower(""));
+        assert!(~"ymca" == to_lower("YMCA"));
     }
 
     #[test]
@@ -3346,15 +3360,15 @@ mod tests {
     #[test]
     fn test_shift_byte() {
         let mut s = ~"ABC";
-        let b = raw::shift_byte(&mut s);
+        let b = unsafe{raw::shift_byte(&mut s)};
         assert!((s == ~"BC"));
         assert!((b == 65u8));
     }
 
     #[test]
     fn test_pop_byte() {
         let mut s = ~"ABC";
-        let b = raw::pop_byte(&mut s);
+        let b = unsafe{raw::pop_byte(&mut s)};
         assert!((s == ~"AB"));
         assert!((b == 67u8));
     }
@@ -3666,12 +3680,8 @@ mod tests {
 
     #[test]
     fn test_map() {
-        unsafe {
-            assert!(~"" == map(~"", |c|
-                libc::toupper(c as c_char) as char));
-            assert!(~"YMCA" == map(~"ymca",
-                                  |c| libc::toupper(c as c_char) as char));
-        }
+        assert!(~"" == map(~"", |c| unsafe {libc::toupper(c as c_char)} as char));
+        assert!(~"YMCA" == map(~"ymca", |c| unsafe {libc::toupper(c as c_char)} as char));
     }
 
     #[test]
@@ -3685,11 +3695,11 @@ mod tests {
 
     #[test]
     fn test_any() {
-        assert!(false  == any(~"", char::is_uppercase));
+        assert!(false == any(~"", char::is_uppercase));
         assert!(false == any(~"ymca", char::is_uppercase));
         assert!(true  == any(~"YMCA", char::is_uppercase));
-        assert!(true == any(~"yMCA", char::is_uppercase));
-        assert!(true == any(~"Ymcy", char::is_uppercase));
+        assert!(true  == any(~"yMCA", char::is_uppercase));
+        assert!(true  == any(~"Ymcy", char::is_uppercase));
     }
 
     #[test]