Skip to content

Commit 4908017

Browse files
committed
std: Stabilize the std::str module
This commit starts out by consolidating all `str` extension traits into one `StrExt` trait to be included in the prelude. This means that `UnicodeStrPrelude`, `StrPrelude`, and `StrAllocating` have all been merged into one `StrExt` exported by the standard library. Some functionality is currently duplicated with the `StrExt` present in libcore. This commit also currently avoids any methods which require any form of pattern to operate. These functions will be stabilized via a separate RFC. Next, stability of methods and structures are as follows: Stable * from_utf8_unchecked * CowString - after moving to std::string * StrExt::as_bytes * StrExt::as_ptr * StrExt::bytes/Bytes - also made a struct instead of a typedef * StrExt::char_indices/CharIndices - CharOffsets was renamed * StrExt::chars/Chars * StrExt::is_empty * StrExt::len * StrExt::lines/Lines * StrExt::lines_any/LinesAny * StrExt::slice_unchecked * StrExt::trim * StrExt::trim_left * StrExt::trim_right * StrExt::words/Words - also made a struct instead of a typedef Unstable * from_utf8 - the error type was changed to a `Result`, but the error type has yet to prove itself * from_c_str - this function will be handled by the c_str RFC * FromStr - this trait will have an associated error type eventually * StrExt::escape_default - needs iterators at least, unsure if it should make the cut * StrExt::escape_unicode - needs iterators at least, unsure if it should make the cut * StrExt::slice_chars - this function has yet to prove itself * StrExt::slice_shift_char - awaiting conventions about slicing and shifting * StrExt::graphemes/Graphemes - this functionality may only be in libunicode * StrExt::grapheme_indices/GraphemeIndices - this functionality may only be in libunicode * StrExt::width - this functionality may only be in libunicode * StrExt::utf16_units - this functionality may only be in libunicode * StrExt::nfd_chars - this functionality may only be in libunicode * StrExt::nfkd_chars - this functionality may only be in libunicode * StrExt::nfc_chars - this functionality may only be in libunicode * StrExt::nfkc_chars - this functionality may only be in libunicode * StrExt::is_char_boundary - naming is uncertain with container conventions * StrExt::char_range_at - naming is uncertain with container conventions * StrExt::char_range_at_reverse - naming is uncertain with container conventions * StrExt::char_at - naming is uncertain with container conventions * StrExt::char_at_reverse - naming is uncertain with container conventions * StrVector::concat - this functionality may be replaced with iterators, but it's not certain at this time * StrVector::connect - as with concat, may be deprecated in favor of iterators Deprecated * StrAllocating and UnicodeStrPrelude have been merged into StrExit * eq_slice - compiler implementation detail * from_str - use the inherent parse() method * is_utf8 - call from_utf8 instead * replace - call the method instead * truncate_utf16_at_nul - this is an implementation detail of windows and does not need to be exposed. * utf8_char_width - moved to libunicode * utf16_items - moved to libunicode * is_utf16 - moved to libunicode * Utf16Items - moved to libunicode * Utf16Item - moved to libunicode * Utf16Encoder - moved to libunicode * AnyLines - renamed to LinesAny and made a struct * SendStr - use CowString<'static> instead * str::raw - all functionality is deprecated * StrExt::into_string - call to_string() instead * StrExt::repeat - use iterators instead * StrExt::char_len - use .chars().count() instead * StrExt::is_alphanumeric - use .chars().all(..) * StrExt::is_whitespace - use .chars().all(..) Pending deprecation -- while slicing syntax is being worked out, these methods are all #[unstable] * Str - while currently used for generic programming, this trait will be replaced with one of [], deref coercions, or a generic conversion trait. * StrExt::slice - use slicing syntax instead * StrExt::slice_to - use slicing syntax instead * StrExt::slice_from - use slicing syntax instead * StrExt::lev_distance - deprecated with no replacement Awaiting stabilization due to patterns and/or matching * StrExt::contains * StrExt::contains_char * StrExt::split * StrExt::splitn * StrExt::split_terminator * StrExt::rsplitn * StrExt::match_indices * StrExt::split_str * StrExt::starts_with * StrExt::ends_with * StrExt::trim_chars * StrExt::trim_left_chars * StrExt::trim_right_chars * StrExt::find * StrExt::rfind * StrExt::find_str * StrExt::subslice_offset
1 parent 34d6800 commit 4908017

File tree

15 files changed

+1511
-1078
lines changed

15 files changed

+1511
-1078
lines changed

src/libcollections/str.rs

+983-139
Large diffs are not rendered by default.

src/libcollections/string.rs

+40-34
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,12 @@ use core::hash;
2121
use core::mem;
2222
use core::ptr;
2323
use core::ops;
24-
// FIXME: ICE's abound if you import the `Slice` type while importing `Slice` trait
2524
use core::raw::Slice as RawSlice;
25+
use unicode::str as unicode_str;
26+
use unicode::str::Utf16Item;
2627

2728
use slice::CloneSliceExt;
28-
use str;
29-
use str::{CharRange, CowString, FromStr, StrAllocating};
30-
use str::MaybeOwned::Owned;
29+
use str::{mod, CharRange, FromStr, StrExt, Owned, Utf8Error};
3130
use vec::{DerefVec, Vec, as_vec};
3231

3332
/// A growable string stored as a UTF-8 encoded buffer.
@@ -87,8 +86,10 @@ impl String {
8786
/// Returns the vector as a string buffer, if possible, taking care not to
8887
/// copy it.
8988
///
90-
/// Returns `Err` with the original vector if the vector contains invalid
91-
/// UTF-8.
89+
/// # Failure
90+
///
91+
/// If the given vector is not valid UTF-8, then the original vector and the
92+
/// corresponding error is returned.
9293
///
9394
/// # Examples
9495
///
@@ -103,11 +104,10 @@ impl String {
103104
/// ```
104105
#[inline]
105106
#[unstable = "error type may change"]
106-
pub fn from_utf8(vec: Vec<u8>) -> Result<String, Vec<u8>> {
107-
if str::is_utf8(vec.as_slice()) {
108-
Ok(String { vec: vec })
109-
} else {
110-
Err(vec)
107+
pub fn from_utf8(vec: Vec<u8>) -> Result<String, (Vec<u8>, Utf8Error)> {
108+
match str::from_utf8(vec.as_slice()) {
109+
Ok(..) => Ok(String { vec: vec }),
110+
Err(e) => Err((vec, e))
111111
}
112112
}
113113

@@ -123,8 +123,9 @@ impl String {
123123
/// ```
124124
#[unstable = "return type may change"]
125125
pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
126-
if str::is_utf8(v) {
127-
return Cow::Borrowed(unsafe { mem::transmute(v) })
126+
match str::from_utf8(v) {
127+
Ok(s) => return Cow::Borrowed(s),
128+
Err(..) => {}
128129
}
129130

130131
static TAG_CONT_U8: u8 = 128u8;
@@ -173,7 +174,7 @@ impl String {
173174
if byte < 128u8 {
174175
// subseqidx handles this
175176
} else {
176-
let w = str::utf8_char_width(byte);
177+
let w = unicode_str::utf8_char_width(byte);
177178

178179
match w {
179180
2 => {
@@ -235,7 +236,7 @@ impl String {
235236
res.as_mut_vec().push_all(v[subseqidx..total])
236237
};
237238
}
238-
Cow::Owned(res.into_string())
239+
Cow::Owned(res)
239240
}
240241

241242
/// Decode a UTF-16 encoded vector `v` into a `String`, returning `None`
@@ -256,10 +257,10 @@ impl String {
256257
#[unstable = "error value in return may change"]
257258
pub fn from_utf16(v: &[u16]) -> Option<String> {
258259
let mut s = String::with_capacity(v.len());
259-
for c in str::utf16_items(v) {
260+
for c in unicode_str::utf16_items(v) {
260261
match c {
261-
str::ScalarValue(c) => s.push(c),
262-
str::LoneSurrogate(_) => return None
262+
Utf16Item::ScalarValue(c) => s.push(c),
263+
Utf16Item::LoneSurrogate(_) => return None
263264
}
264265
}
265266
Some(s)
@@ -281,7 +282,7 @@ impl String {
281282
/// ```
282283
#[stable]
283284
pub fn from_utf16_lossy(v: &[u16]) -> String {
284-
str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
285+
unicode_str::utf16_items(v).map(|c| c.to_char_lossy()).collect()
285286
}
286287

287288
/// Convert a vector of `char`s to a `String`.
@@ -812,21 +813,12 @@ impl<'a, 'b> PartialEq<CowString<'a>> for &'b str {
812813
}
813814

814815
#[experimental = "waiting on Str stabilization"]
816+
#[allow(deprecated)]
815817
impl Str for String {
816818
#[inline]
817819
#[stable]
818820
fn as_slice<'a>(&'a self) -> &'a str {
819-
unsafe {
820-
mem::transmute(self.vec.as_slice())
821-
}
822-
}
823-
}
824-
825-
#[experimental = "waiting on StrAllocating stabilization"]
826-
impl StrAllocating for String {
827-
#[inline]
828-
fn into_string(self) -> String {
829-
self
821+
unsafe { mem::transmute(self.vec.as_slice()) }
830822
}
831823
}
832824

@@ -841,15 +833,15 @@ impl Default for String {
841833
#[experimental = "waiting on Show stabilization"]
842834
impl fmt::Show for String {
843835
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
844-
self.as_slice().fmt(f)
836+
(*self).fmt(f)
845837
}
846838
}
847839

848840
#[experimental = "waiting on Hash stabilization"]
849841
impl<H: hash::Writer> hash::Hash<H> for String {
850842
#[inline]
851843
fn hash(&self, hasher: &mut H) {
852-
self.as_slice().hash(hasher)
844+
(*self).hash(hasher)
853845
}
854846
}
855847

@@ -873,7 +865,7 @@ impl<'a> Add<&'a str, String> for String {
873865
impl ops::Slice<uint, str> for String {
874866
#[inline]
875867
fn as_slice_<'a>(&'a self) -> &'a str {
876-
self.as_slice()
868+
unsafe { mem::transmute(self.vec.as_slice()) }
877869
}
878870

879871
#[inline]
@@ -894,7 +886,9 @@ impl ops::Slice<uint, str> for String {
894886

895887
#[experimental = "waiting on Deref stabilization"]
896888
impl ops::Deref<str> for String {
897-
fn deref<'a>(&'a self) -> &'a str { self.as_slice() }
889+
fn deref<'a>(&'a self) -> &'a str {
890+
unsafe { mem::transmute(self.vec[]) }
891+
}
898892
}
899893

900894
/// Wrapper type providing a `&String` reference via `Deref`.
@@ -1015,6 +1009,18 @@ pub mod raw {
10151009
}
10161010
}
10171011

1012+
/// A clone-on-write string
1013+
#[stable]
1014+
pub type CowString<'a> = Cow<'a, String, str>;
1015+
1016+
#[allow(deprecated)]
1017+
impl<'a> Str for CowString<'a> {
1018+
#[inline]
1019+
fn as_slice<'b>(&'b self) -> &'b str {
1020+
(**self).as_slice()
1021+
}
1022+
}
1023+
10181024
#[cfg(test)]
10191025
mod tests {
10201026
use prelude::*;

src/libcore/fmt/float.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use num::cast;
2323
use ops::FnOnce;
2424
use result::Result::Ok;
2525
use slice::{mod, SliceExt};
26-
use str::StrPrelude;
26+
use str::StrExt;
2727

2828
/// A flag that specifies whether to use exponential (scientific) notation.
2929
pub enum ExponentFormat {

src/libcore/fmt/mod.rs

+14-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use result::Result::{Ok, Err};
2424
use result;
2525
use slice::SliceExt;
2626
use slice;
27-
use str::StrPrelude;
27+
use str::{StrExt, Utf8Error};
2828

2929
pub use self::num::radix;
3030
pub use self::num::Radix;
@@ -795,5 +795,18 @@ impl<'b, T: Show> Show for RefMut<'b, T> {
795795
}
796796
}
797797

798+
impl Show for Utf8Error {
799+
fn fmt(&self, f: &mut Formatter) -> Result {
800+
match *self {
801+
Utf8Error::InvalidByte(n) => {
802+
write!(f, "invalid utf-8: invalid byte at index {}", n)
803+
}
804+
Utf8Error::TooShort => {
805+
write!(f, "invalid utf-8: byte slice too short")
806+
}
807+
}
808+
}
809+
}
810+
798811
// If you expected tests to be here, look instead at the run-pass/ifmt.rs test,
799812
// it's a lot easier than creating all of the rt::Piece structures here.

src/libcore/num/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use ops::{Add, Sub, Mul, Div, Rem, Neg};
3232
use ops::{Not, BitAnd, BitOr, BitXor, Shl, Shr};
3333
use option::Option;
3434
use option::Option::{Some, None};
35-
use str::{FromStr, from_str, StrPrelude};
35+
use str::{FromStr, from_str, StrExt};
3636

3737
/// Simultaneous division and remainder
3838
#[inline]

src/libcore/prelude.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ pub use option::Option::{Some, None};
6060
pub use ptr::RawPtr;
6161
pub use result::Result;
6262
pub use result::Result::{Ok, Err};
63-
pub use str::{Str, StrPrelude};
63+
pub use str::{Str, StrExt};
6464
pub use tuple::{Tuple1, Tuple2, Tuple3, Tuple4};
6565
pub use tuple::{Tuple5, Tuple6, Tuple7, Tuple8};
6666
pub use tuple::{Tuple9, Tuple10, Tuple11, Tuple12};

0 commit comments

Comments
 (0)