@@ -747,6 +747,46 @@ pub trait Read {
747
747
self . utf8_chars ( )
748
748
}
749
749
750
+ /// Transforms this `Read` instance to an `Iterator` over `char`s.
751
+ ///
752
+ /// This adaptor will attempt to interpret this reader as a UTF-8 encoded
753
+ /// sequence of characters. The returned iterator will return `None` once
754
+ /// EOF is reached for this reader. Otherwise each element yielded will be a
755
+ /// `Result<char, E>` where `E` may contain information about what I/O error
756
+ /// occurred.
757
+ ///
758
+ /// Compared to `utf8_chars`, byte sequences invalid in UTF-8 are replaced
759
+ /// with U+FFFD replacement characters instead of being a variant of error.
760
+ ///
761
+ /// # Examples
762
+ ///
763
+ /// [`File`][file]s implement `Read`:
764
+ ///
765
+ /// [file]: ../fs/struct.File.html
766
+ ///
767
+ /// ```
768
+ /// #![feature(io)]
769
+ /// use std::io;
770
+ /// use std::io::prelude::*;
771
+ /// use std::fs::File;
772
+ ///
773
+ /// # fn foo() -> io::Result<()> {
774
+ /// let mut f = try!(File::open("foo.txt"));
775
+ ///
776
+ /// for c in f.utf8_chars_lossy() {
777
+ /// println!("{}", c.unwrap());
778
+ /// }
779
+ /// # Ok(())
780
+ /// # }
781
+ /// ```
782
+ #[ unstable( feature = "io" , reason = "the semantics of a partial read/write \
783
+ of where errors happen is currently \
784
+ unclear and may change",
785
+ issue = "27802" ) ]
786
+ fn utf8_chars_lossy ( self ) -> Utf8CharsLossy < Self > where Self : Sized {
787
+ Utf8CharsLossy { inner : self . utf8_chars ( ) }
788
+ }
789
+
750
790
/// Creates an adaptor which will chain this stream with another.
751
791
///
752
792
/// The returned `Read` instance will first read all bytes from this object
@@ -1698,6 +1738,35 @@ impl fmt::Display for Utf8CharsError {
1698
1738
}
1699
1739
}
1700
1740
1741
+ /// An iterator over the `char`s of a reader.
1742
+ ///
1743
+ /// This struct is generally created by calling [`utf8_chars()`][utf8_chars] on a reader.
1744
+ /// Please see the documentation of `utf8_chars()` for more details.
1745
+ ///
1746
+ /// [utf8_chars]: trait.Read.html#method.utf8_chars
1747
+ #[ unstable( feature = "io" , reason = "awaiting stability of Read::utf8_chars_lossy" ,
1748
+ issue = "27802" ) ]
1749
+ pub struct Utf8CharsLossy < R > {
1750
+ inner : Utf8Chars < R > ,
1751
+ }
1752
+
1753
+ #[ unstable( feature = "io" , reason = "awaiting stability of Read::utf8_chars" ,
1754
+ issue = "27802" ) ]
1755
+ impl < R : Read > Iterator for Utf8CharsLossy < R > {
1756
+ type Item = result:: Result < char , Error > ;
1757
+
1758
+ fn next ( & mut self ) -> Option < result:: Result < char , Error > > {
1759
+ // Follow Unicode Standard §5.22 "Best Practice for U+FFFD Substitution"
1760
+ // http://www.unicode.org/versions/Unicode8.0.0/ch05.pdf#G40630
1761
+ self . inner . next ( ) . map ( |result| match result {
1762
+ Ok ( c) => Ok ( c) ,
1763
+ Err ( Utf8CharsError :: InvalidUtf8 ) |
1764
+ Err ( Utf8CharsError :: IncompleteUtf8 ) => Ok ( '\u{FFFD}' ) ,
1765
+ Err ( Utf8CharsError :: Io ( e) ) => Err ( e) ,
1766
+ } )
1767
+ }
1768
+ }
1769
+
1701
1770
/// An iterator over the contents of an instance of `BufRead` split on a
1702
1771
/// particular byte.
1703
1772
///
@@ -1768,24 +1837,16 @@ mod tests {
1768
1837
use prelude:: v1:: * ;
1769
1838
use io:: prelude:: * ;
1770
1839
use io;
1771
- use super :: Utf8CharsError ;
1772
1840
use super :: Cursor ;
1773
1841
use test;
1774
1842
use super :: repeat;
1775
1843
1776
1844
fn chars_lossy ( bytes : & [ u8 ] ) -> String {
1777
- // Follow Unicode Standard §5.22 "Best Practice for U+FFFD Substitution"
1778
- // http://www.unicode.org/versions/Unicode8.0.0/ch05.pdf#G40630
1779
- Cursor :: new ( bytes) . utf8_chars ( ) . map ( |result| match result {
1780
- Ok ( c) => c,
1781
- Err ( Utf8CharsError :: InvalidUtf8 ) |
1782
- Err ( Utf8CharsError :: IncompleteUtf8 ) => '\u{FFFD}' ,
1783
- Err ( Utf8CharsError :: Io ( e) ) => panic ! ( "{}" , e) ,
1784
- } ) . collect ( )
1845
+ Cursor :: new ( bytes) . utf8_chars_lossy ( ) . collect :: < Result < _ , _ > > ( ) . unwrap ( )
1785
1846
}
1786
1847
1787
1848
#[ test]
1788
- fn utf8_chars ( ) {
1849
+ fn utf8_chars_lossy ( ) {
1789
1850
assert_eq ! ( chars_lossy( b"\xf0 \x9f abc" ) , "�abc" ) ;
1790
1851
assert_eq ! ( chars_lossy( b"\xed \xa0 \x80 a" ) , "���a" ) ;
1791
1852
assert_eq ! ( chars_lossy( b"\xed \xa0 a" ) , "��a" ) ;
0 commit comments