16
16
17
17
use mem;
18
18
use char;
19
+ use char:: Char ;
19
20
use clone:: Clone ;
20
21
use cmp;
21
22
use cmp:: { PartialEq , Eq } ;
@@ -24,7 +25,7 @@ use default::Default;
24
25
use iter:: { Filter , Map , Iterator } ;
25
26
use iter:: { DoubleEndedIterator , ExactSize } ;
26
27
use iter:: range;
27
- use num:: Saturating ;
28
+ use num:: { CheckedMul , Saturating } ;
28
29
use option:: { None , Option , Some } ;
29
30
use raw:: Repr ;
30
31
use slice:: ImmutableVector ;
@@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
557
558
}
558
559
}
559
560
561
+ /// External iterator for a string's UTF16 codeunits.
562
+ /// Use with the `std::iter` module.
563
+ #[ deriving( Clone ) ]
564
+ pub struct Utf16CodeUnits < ' a > {
565
+ chars : Chars < ' a > ,
566
+ extra : u16
567
+ }
568
+
569
+ impl < ' a > Iterator < u16 > for Utf16CodeUnits < ' a > {
570
+ #[ inline]
571
+ fn next ( & mut self ) -> Option < u16 > {
572
+ if self . extra != 0 {
573
+ let tmp = self . extra ;
574
+ self . extra = 0 ;
575
+ return Some ( tmp) ;
576
+ }
577
+
578
+ let mut buf = [ 0u16 , ..2 ] ;
579
+ self . chars . next ( ) . map ( |ch| {
580
+ let n = ch. encode_utf16 ( buf /* as mut slice! */ ) ;
581
+ if n == 2 { self . extra = buf[ 1 ] ; }
582
+ buf[ 0 ]
583
+ } )
584
+ }
585
+
586
+ #[ inline]
587
+ fn size_hint ( & self ) -> ( uint , Option < uint > ) {
588
+ let ( low, high) = self . chars . size_hint ( ) ;
589
+ // every char gets either one u16 or two u16,
590
+ // so this iterator is between 1 or 2 times as
591
+ // long as the underlying iterator.
592
+ ( low, high. and_then ( |n| n. checked_mul ( & 2 ) ) )
593
+ }
594
+ }
595
+
560
596
/*
561
597
Section: Comparing strings
562
598
*/
@@ -1619,6 +1655,9 @@ pub trait StrSlice<'a> {
1619
1655
/// and that it is not reallocated (e.g. by pushing to the
1620
1656
/// string).
1621
1657
fn as_ptr ( & self ) -> * const u8 ;
1658
+
1659
+ /// Return an iterator of `u16` over the string encoded as UTF-16.
1660
+ fn utf16_units ( & self ) -> Utf16CodeUnits < ' a > ;
1622
1661
}
1623
1662
1624
1663
impl < ' a > StrSlice < ' a > for & ' a str {
@@ -1967,6 +2006,11 @@ impl<'a> StrSlice<'a> for &'a str {
1967
2006
fn as_ptr ( & self ) -> * const u8 {
1968
2007
self . repr ( ) . data
1969
2008
}
2009
+
2010
+ #[ inline]
2011
+ fn utf16_units ( & self ) -> Utf16CodeUnits < ' a > {
2012
+ Utf16CodeUnits { chars : self . chars ( ) , extra : 0 }
2013
+ }
1970
2014
}
1971
2015
1972
2016
impl < ' a > Default for & ' a str {
0 commit comments