@@ -621,8 +621,7 @@ enum NormalizationForm {
621
621
#[ deriving( Clone ) ]
622
622
struct NormalizationIterator < ' self > {
623
623
priv kind : NormalizationForm ,
624
- priv index : uint ,
625
- priv string : & ' self str ,
624
+ priv iter : CharIterator < ' self > ,
626
625
priv buffer : ~[ ( char , u8 ) ] ,
627
626
priv sorted : bool
628
627
}
@@ -650,16 +649,17 @@ impl<'self> Iterator<char> for NormalizationIterator<'self> {
650
649
NFKD => char:: decompose_compatible
651
650
} ;
652
651
653
- while !self . sorted && self . index < self . string . len ( ) {
654
- let CharRange { ch, next} = self . string . char_range_at ( self . index ) ;
655
- self . index = next;
656
- do decomposer( ch) |d| {
657
- let class = canonical_combining_class ( d) ;
658
- if class == 0 && !self . sorted {
659
- canonical_sort ( self . buffer ) ;
660
- self . sorted = true ;
652
+ if !self . sorted {
653
+ for ch in self . iter {
654
+ do decomposer ( ch) |d| {
655
+ let class = canonical_combining_class ( d) ;
656
+ if class == 0 && !self . sorted {
657
+ canonical_sort ( self . buffer ) ;
658
+ self . sorted = true ;
659
+ }
660
+ self . buffer . push ( ( d, class) ) ;
661
661
}
662
- self . buffer . push ( ( d , class ) ) ;
662
+ if self . sorted { break }
663
663
}
664
664
}
665
665
@@ -678,7 +678,10 @@ impl<'self> Iterator<char> for NormalizationIterator<'self> {
678
678
}
679
679
}
680
680
681
- fn size_hint ( & self ) -> ( uint , Option < uint > ) { ( self . string . len ( ) , None ) }
681
+ fn size_hint ( & self ) -> ( uint , Option < uint > ) {
682
+ let ( lower, _) = self . iter . size_hint ( ) ;
683
+ ( lower, None )
684
+ }
682
685
}
683
686
684
687
/// Replace all occurrences of one string with another
@@ -1588,8 +1591,7 @@ impl<'self> StrSlice<'self> for &'self str {
1588
1591
/// Returns the string in Unicode Normalization Form D (canonical decomposition)
1589
1592
fn nfd_iter(&self) -> NormalizationIterator<'self> {
1590
1593
NormalizationIterator {
1591
- index: 0,
1592
- string: *self,
1594
+ iter: self.iter(),
1593
1595
buffer: ~[],
1594
1596
sorted: false,
1595
1597
kind: NFD
@@ -1599,8 +1601,7 @@ impl<'self> StrSlice<'self> for &'self str {
1599
1601
/// Returns the string in Unicode Normalization Form KD (compatibility decomposition)
1600
1602
fn nfkd_iter(&self) -> NormalizationIterator<'self> {
1601
1603
NormalizationIterator {
1602
- index: 0,
1603
- string: *self,
1604
+ iter: self.iter(),
1604
1605
buffer: ~[],
1605
1606
sorted: false,
1606
1607
kind: NFKD
@@ -1672,6 +1673,7 @@ impl<'self> StrSlice<'self> for &'self str {
1672
1673
if count == end { end_byte = Some(idx); break; }
1673
1674
count += 1;
1674
1675
}
1676
+ if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
1675
1677
if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
1676
1678
1677
1679
match (begin_byte, end_byte) {
@@ -2659,8 +2661,11 @@ mod tests {
2659
2661
fn t(a: &str, b: &str, start: uint) {
2660
2662
assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2661
2663
}
2664
+ t(" ", " ", 0);
2662
2665
t(" hello", " llo", 2);
2663
2666
t(" hello", " el", 1);
2667
+ t(" αβλ", " β", 1);
2668
+ t(" αβλ", " ", 3);
2664
2669
assert_eq!(" ะเทศไท", " ประเทศไทย中华Việt Nam ".slice_chars(2, 8));
2665
2670
}
2666
2671
0 commit comments