Skip to content

Commit 4de9bca

Browse files
author
blake2-ppc
committed
std::str: Tune CharSplitIterator after benchmarks
Implement Huon Wilson's suggestions (since the benchmarks agree!). Use `self.sep.matches(byte as char) && byte < 128u8` to match in the only_ascii case so that mistaken matches outside the ascii range can't create invalid substrings. Put the conditional on only_ascii outside the loop.
1 parent 413f868 commit 4de9bca

File tree

1 file changed

+44
-55
lines changed

1 file changed

+44
-55
lines changed

src/libstd/str.rs

+44-55
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ use char;
2121
use char::Char;
2222
use clone::{Clone, DeepClone};
2323
use container::{Container, Mutable};
24-
use either::{Left, Right};
2524
use iter::Times;
2625
use iterator::{Iterator, FromIterator, Extendable};
2726
use iterator::{Filter, AdditiveIterator, Map};
@@ -411,36 +410,30 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
411410
fn next(&mut self) -> Option<&'self str> {
412411
if self.finished { return None }
413412

414-
let len = self.string.len();
415-
let mut iter = match self.only_ascii {
416-
true => Left(self.string.byte_iter().enumerate()),
417-
false => Right(self.string.char_offset_iter())
418-
};
419-
420-
loop {
421-
let (idx, next) = match iter {
422-
// this gives a *huge* speed up for splitting on ASCII
423-
// characters (e.g. '\n' or ' ')
424-
Left(ref mut it) => match it.next() {
425-
Some((idx, byte)) if byte < 128u8 && self.sep.matches(byte as char) =>
426-
(idx, idx + 1),
427-
Some(*) => loop,
428-
None => break,
429-
},
430-
Right(ref mut it) => match it.next() {
431-
Some((idx, ch)) if self.sep.matches(ch) =>
432-
(idx, self.string.char_range_at(idx).next),
433-
Some(*) => loop,
434-
None => break,
413+
let mut next_split = None;
414+
if self.only_ascii {
415+
for (idx, byte) in self.string.byte_iter().enumerate() {
416+
if self.sep.matches(byte as char) && byte < 128u8 {
417+
next_split = Some((idx, idx + 1));
418+
break;
435419
}
436-
};
437-
unsafe {
438-
let elt = raw::slice_bytes(self.string, 0, idx);
439-
self.string = raw::slice_bytes(self.string, next, len);
440-
return Some(elt)
441420
}
421+
} else {
422+
for (idx, ch) in self.string.char_offset_iter() {
423+
if self.sep.matches(ch) {
424+
next_split = Some((idx, self.string.char_range_at(idx).next));
425+
break;
426+
}
427+
}
428+
}
429+
match next_split {
430+
Some((a, b)) => unsafe {
431+
let elt = raw::slice_unchecked(self.string, 0, a);
432+
self.string = raw::slice_unchecked(self.string, b, self.string.len());
433+
Some(elt)
434+
},
435+
None => self.get_end(),
442436
}
443-
self.get_end()
444437
}
445438
}
446439

@@ -458,36 +451,32 @@ for CharSplitIterator<'self, Sep> {
458451
}
459452
}
460453
let len = self.string.len();
461-
let mut iter = match self.only_ascii {
462-
true => Left(self.string.byte_rev_iter().enumerate()),
463-
false => Right(self.string.char_offset_iter())
464-
};
465-
466-
loop {
467-
let (idx, next) = match iter {
468-
Left(ref mut it) => match it.next() {
469-
Some((j, byte)) if byte < 128u8 && self.sep.matches(byte as char) => {
470-
let idx = self.string.len() - j - 1;
471-
(idx, idx + 1)
472-
},
473-
Some(*) => loop,
474-
None => break,
475-
},
476-
Right(ref mut it) => match it.next_back() {
477-
Some((idx, ch)) if self.sep.matches(ch) =>
478-
(idx, self.string.char_range_at(idx).next),
479-
Some(*) => loop,
480-
None => break,
454+
let mut next_split = None;
455+
456+
if self.only_ascii {
457+
for (j, byte) in self.string.byte_rev_iter().enumerate() {
458+
if self.sep.matches(byte as char) && byte < 128u8 {
459+
let idx = len - j - 1;
460+
next_split = Some((idx, idx + 1));
461+
break;
481462
}
482-
};
483-
unsafe {
484-
let elt = raw::slice_bytes(self.string, next, len);
485-
self.string = raw::slice_bytes(self.string, 0, idx);
486-
return Some(elt)
487463
}
464+
} else {
465+
for (idx, ch) in self.string.char_offset_rev_iter() {
466+
if self.sep.matches(ch) {
467+
next_split = Some((idx, self.string.char_range_at(idx).next));
468+
break;
469+
}
470+
}
471+
}
472+
match next_split {
473+
Some((a, b)) => unsafe {
474+
let elt = raw::slice_unchecked(self.string, b, len);
475+
self.string = raw::slice_unchecked(self.string, 0, a);
476+
Some(elt)
477+
},
478+
None => { self.finished = true; Some(self.string) }
488479
}
489-
self.finished = true;
490-
Some(self.string)
491480
}
492481
}
493482

0 commit comments

Comments
 (0)