Skip to content

Commit fae703e

Browse files
committed
Rename std::io::Read::chars to utf8_chars.
Unlike `str::chars` where UTF-8 is implied since that’s always the encoding of `str` (whose contents is guaranteed to be well-formed), the bytes read from `io::Read` are arbitrary. Fixes rust-lang#33761.
1 parent bffa87b commit fae703e

File tree

3 files changed

+48
-40
lines changed

3 files changed

+48
-40
lines changed

src/libstd/io/buffered.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,14 +1076,14 @@ mod tests {
10761076
fn read_char_buffered() {
10771077
let buf = [195, 159];
10781078
let reader = BufReader::with_capacity(1, &buf[..]);
1079-
assert_eq!(reader.chars().next().unwrap().unwrap(), 'ß');
1079+
assert_eq!(reader.utf8_chars().next().unwrap().unwrap(), 'ß');
10801080
}
10811081

10821082
#[test]
10831083
fn test_chars() {
10841084
let buf = [195, 159, b'a'];
10851085
let reader = BufReader::with_capacity(1, &buf[..]);
1086-
let mut it = reader.chars();
1086+
let mut it = reader.utf8_chars();
10871087
assert_eq!(it.next().unwrap().unwrap(), 'ß');
10881088
assert_eq!(it.next().unwrap().unwrap(), 'a');
10891089
assert!(it.next().is_none());

src/libstd/io/cursor.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ mod tests {
481481
#[test]
482482
fn test_read_char() {
483483
let b = &b"Vi\xE1\xBB\x87t"[..];
484-
let mut c = Cursor::new(b).chars();
484+
let mut c = Cursor::new(b).utf8_chars();
485485
assert_eq!(c.next().unwrap().unwrap(), 'V');
486486
assert_eq!(c.next().unwrap().unwrap(), 'i');
487487
assert_eq!(c.next().unwrap().unwrap(), 'ệ');
@@ -492,7 +492,7 @@ mod tests {
492492
#[test]
493493
fn test_read_bad_char() {
494494
let b = &b"\x80"[..];
495-
let mut c = Cursor::new(b).chars();
495+
let mut c = Cursor::new(b).utf8_chars();
496496
assert!(c.next().unwrap().is_err());
497497
}
498498

src/libstd/io/mod.rs

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,7 @@ pub trait Read {
726726
/// # fn foo() -> io::Result<()> {
727727
/// let mut f = try!(File::open("foo.txt"));
728728
///
729-
/// for c in f.chars() {
729+
/// for c in f.utf8_chars() {
730730
/// println!("{}", c.unwrap());
731731
/// }
732732
/// # Ok(())
@@ -736,8 +736,15 @@ pub trait Read {
736736
of where errors happen is currently \
737737
unclear and may change",
738738
issue = "27802")]
739-
fn chars(self) -> Chars<Self> where Self: Sized {
740-
Chars { inner: self, buffer: None }
739+
fn utf8_chars(self) -> Utf8Chars<Self> where Self: Sized {
740+
Utf8Chars { inner: self, buffer: None }
741+
}
742+
743+
/// Former name of the `utf8_chars` method.
744+
#[rustc_deprecated(since = "1.10.0", reason = "renamed to `utf8_chars`")]
745+
#[unstable(feature = "io", reason = "renamed while unstable", issue = "27802")]
746+
fn chars(self) -> Utf8Chars<Self> where Self: Sized {
747+
self.utf8_chars()
741748
}
742749

743750
/// Creates an adaptor which will chain this stream with another.
@@ -1547,23 +1554,23 @@ impl<R: Read> Iterator for Bytes<R> {
15471554

15481555
/// An iterator over the `char`s of a reader.
15491556
///
1550-
/// This struct is generally created by calling [`chars()`][chars] on a reader.
1551-
/// Please see the documentation of `chars()` for more details.
1557+
/// This struct is generally created by calling [`utf8_chars()`][utf8_chars] on a reader.
1558+
/// Please see the documentation of `utf8_chars()` for more details.
15521559
///
1553-
/// [chars]: trait.Read.html#method.chars
1554-
#[unstable(feature = "io", reason = "awaiting stability of Read::chars",
1560+
/// [utf8_chars]: trait.Read.html#method.utf8_chars
1561+
#[unstable(feature = "io", reason = "awaiting stability of Read::utf8_chars",
15551562
issue = "27802")]
1556-
pub struct Chars<R> {
1563+
pub struct Utf8Chars<R> {
15571564
inner: R,
15581565
buffer: Option<u8>,
15591566
}
15601567

1561-
/// An enumeration of possible errors that can be generated from the `Chars`
1568+
/// An enumeration of possible errors that can be generated from the `Utf8Chars`
15621569
/// adapter.
15631570
#[derive(Debug)]
1564-
#[unstable(feature = "io", reason = "awaiting stability of Read::chars",
1571+
#[unstable(feature = "io", reason = "awaiting stability of Read::utf8_chars",
15651572
issue = "27802")]
1566-
pub enum CharsError {
1573+
pub enum Utf8CharsError {
15671574
/// Variant representing that the underlying stream was read successfully
15681575
/// but contains a byte sequence ill-formed in UTF-8.
15691576
InvalidUtf8,
@@ -1576,12 +1583,12 @@ pub enum CharsError {
15761583
Io(Error),
15771584
}
15781585

1579-
#[unstable(feature = "io", reason = "awaiting stability of Read::chars",
1586+
#[unstable(feature = "io", reason = "awaiting stability of Read::utf8_chars",
15801587
issue = "27802")]
1581-
impl<R: Read> Iterator for Chars<R> {
1582-
type Item = result::Result<char, CharsError>;
1588+
impl<R: Read> Iterator for Utf8Chars<R> {
1589+
type Item = result::Result<char, Utf8CharsError>;
15831590

1584-
fn next(&mut self) -> Option<result::Result<char, CharsError>> {
1591+
fn next(&mut self) -> Option<result::Result<char, Utf8CharsError>> {
15851592
let mut buf = [0];
15861593
macro_rules! read_byte {
15871594
(EOF => $on_eof: expr) => {
@@ -1591,7 +1598,7 @@ impl<R: Read> Iterator for Chars<R> {
15911598
Ok(0) => $on_eof,
15921599
Ok(..) => break,
15931600
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
1594-
Err(e) => return Some(Err(CharsError::Io(e))),
1601+
Err(e) => return Some(Err(Utf8CharsError::Io(e))),
15951602
}
15961603
}
15971604
buf[0]
@@ -1607,11 +1614,11 @@ impl<R: Read> Iterator for Chars<R> {
16071614
macro_rules! continuation_byte {
16081615
($range: pat) => {
16091616
{
1610-
match read_byte!(EOF => return Some(Err(CharsError::IncompleteUtf8))) {
1617+
match read_byte!(EOF => return Some(Err(Utf8CharsError::IncompleteUtf8))) {
16111618
byte @ $range => (byte & 0b0011_1111) as u32,
16121619
byte => {
16131620
self.buffer = Some(byte);
1614-
return Some(Err(CharsError::InvalidUtf8))
1621+
return Some(Err(Utf8CharsError::InvalidUtf8))
16151622
}
16161623
}
16171624
}
@@ -1647,46 +1654,46 @@ impl<R: Read> Iterator for Chars<R> {
16471654
let fourth = continuation_byte!(0x80...0xBF);
16481655
((first & 0b0000_0111) as u32) << 18 | second << 12 | third << 6 | fourth
16491656
}
1650-
_ => return Some(Err(CharsError::InvalidUtf8))
1657+
_ => return Some(Err(Utf8CharsError::InvalidUtf8))
16511658
};
16521659
unsafe {
16531660
Some(Ok(char::from_u32_unchecked(code_point)))
16541661
}
16551662
}
16561663
}
16571664

1658-
#[unstable(feature = "io", reason = "awaiting stability of Read::chars",
1665+
#[unstable(feature = "io", reason = "awaiting stability of Read::utf8_chars",
16591666
issue = "27802")]
1660-
impl std_error::Error for CharsError {
1667+
impl std_error::Error for Utf8CharsError {
16611668
fn description(&self) -> &str {
16621669
match *self {
1663-
CharsError::InvalidUtf8 => "invalid UTF-8 byte sequence",
1664-
CharsError::IncompleteUtf8 => {
1670+
Utf8CharsError::InvalidUtf8 => "invalid UTF-8 byte sequence",
1671+
Utf8CharsError::IncompleteUtf8 => {
16651672
"stream ended in the middle of an UTF-8 byte sequence"
16661673
}
1667-
CharsError::Io(ref e) => std_error::Error::description(e),
1674+
Utf8CharsError::Io(ref e) => std_error::Error::description(e),
16681675
}
16691676
}
16701677
fn cause(&self) -> Option<&std_error::Error> {
16711678
match *self {
1672-
CharsError::InvalidUtf8 | CharsError::IncompleteUtf8 => None,
1673-
CharsError::Io(ref e) => e.cause(),
1679+
Utf8CharsError::InvalidUtf8 | Utf8CharsError::IncompleteUtf8 => None,
1680+
Utf8CharsError::Io(ref e) => e.cause(),
16741681
}
16751682
}
16761683
}
16771684

1678-
#[unstable(feature = "io", reason = "awaiting stability of Read::chars",
1685+
#[unstable(feature = "io", reason = "awaiting stability of Read::utf8_chars",
16791686
issue = "27802")]
1680-
impl fmt::Display for CharsError {
1687+
impl fmt::Display for Utf8CharsError {
16811688
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
16821689
match *self {
1683-
CharsError::InvalidUtf8 => {
1690+
Utf8CharsError::InvalidUtf8 => {
16841691
"invalid UTF-8 byte sequence".fmt(f)
16851692
}
1686-
CharsError::IncompleteUtf8 => {
1693+
Utf8CharsError::IncompleteUtf8 => {
16871694
"stream ended in the middle of an UTF-8 byte sequence".fmt(f)
16881695
}
1689-
CharsError::Io(ref e) => e.fmt(f),
1696+
Utf8CharsError::Io(ref e) => e.fmt(f),
16901697
}
16911698
}
16921699
}
@@ -1761,23 +1768,24 @@ mod tests {
17611768
use prelude::v1::*;
17621769
use io::prelude::*;
17631770
use io;
1764-
use super::CharsError;
1771+
use super::Utf8CharsError;
17651772
use super::Cursor;
17661773
use test;
17671774
use super::repeat;
17681775

17691776
fn chars_lossy(bytes: &[u8]) -> String {
17701777
// Follow Unicode Standard §5.22 "Best Practice for U+FFFD Substitution"
17711778
// http://www.unicode.org/versions/Unicode8.0.0/ch05.pdf#G40630
1772-
Cursor::new(bytes).chars().map(|result| match result {
1779+
Cursor::new(bytes).utf8_chars().map(|result| match result {
17731780
Ok(c) => c,
1774-
Err(CharsError::InvalidUtf8) | Err(CharsError::IncompleteUtf8) => '\u{FFFD}',
1775-
Err(CharsError::Io(e)) => panic!("{}", e),
1781+
Err(Utf8CharsError::InvalidUtf8) |
1782+
Err(Utf8CharsError::IncompleteUtf8) => '\u{FFFD}',
1783+
Err(Utf8CharsError::Io(e)) => panic!("{}", e),
17761784
}).collect()
17771785
}
17781786

17791787
#[test]
1780-
fn chars() {
1788+
fn utf8_chars() {
17811789
assert_eq!(chars_lossy(b"\xf0\x9fabc"), "�abc");
17821790
assert_eq!(chars_lossy(b"\xed\xa0\x80a"), "���a");
17831791
assert_eq!(chars_lossy(b"\xed\xa0a"), "��a");

0 commit comments

Comments
 (0)