Skip to content

Commit ddd1892

Browse files
committed
remove the funny enum
1 parent 15164ed commit ddd1892

File tree

1 file changed

+46
-158
lines changed

1 file changed

+46
-158
lines changed

library/core/src/str/pattern.rs

Lines changed: 46 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -364,91 +364,9 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
364364
// Impl for char
365365
/////////////////////////////////////////////////////////////////////////////
366366

367-
#[derive(Clone, Debug)]
368-
/// hah
369-
pub struct AsciiCharSearcher<'a> {
370-
haystack: &'a str,
371-
needle: u8,
372-
finger: usize,
373-
finger_back: usize,
374-
}
375-
376-
unsafe impl<'a> Searcher<'a> for AsciiCharSearcher<'a> {
377-
fn haystack(&self) -> &'a str {
378-
self.haystack
379-
}
380-
381-
#[inline]
382-
fn next(&mut self) -> SearchStep {
383-
let byte = self.haystack.as_bytes().get(self.finger);
384-
if let Some(&byte) = byte {
385-
self.finger += 1;
386-
if byte == self.needle {
387-
SearchStep::Match(self.finger - 1, self.finger)
388-
} else {
389-
SearchStep::Reject(self.finger - 1, self.finger)
390-
}
391-
} else {
392-
SearchStep::Done
393-
}
394-
}
395-
396-
#[inline(always)]
397-
fn next_match(&mut self) -> Option<(usize, usize)> {
398-
match unsafe { self.haystack.as_bytes().get_unchecked(self.finger..self.finger_back) }
399-
.iter()
400-
.position(|x| *x == self.needle)
401-
{
402-
Some(x) => {
403-
self.finger += x + 1;
404-
Some((self.finger - 1, self.finger))
405-
}
406-
None => None,
407-
}
408-
}
409-
410-
// let next_reject use the default implementation from the Searcher trait
411-
}
412-
413-
unsafe impl<'a> ReverseSearcher<'a> for AsciiCharSearcher<'a> {
414-
#[inline]
415-
fn next_back(&mut self) -> SearchStep {
416-
let old_finger = self.finger_back;
417-
let slice = unsafe { self.haystack.get_unchecked(self.finger..old_finger) };
418-
419-
let mut iter = slice.as_bytes().iter();
420-
let old_len = iter.len();
421-
if let Some(ch) = iter.next_back() {
422-
self.finger_back -= old_len - iter.len();
423-
if *ch == self.needle {
424-
SearchStep::Match(self.finger_back, old_finger)
425-
} else {
426-
SearchStep::Reject(self.finger_back, old_finger)
427-
}
428-
} else {
429-
SearchStep::Done
430-
}
431-
}
432-
433-
#[inline]
434-
fn next_match_back(&mut self) -> Option<(usize, usize)> {
435-
match memchr::memrchr(self.needle, self.haystack[self.finger..self.finger_back].as_bytes())
436-
{
437-
Some(x) => {
438-
let index = self.finger + x;
439-
self.finger_back = index;
440-
Some((self.finger_back, self.finger_back + 1))
441-
}
442-
None => None,
443-
}
444-
}
445-
446-
// let next_reject use the default implementation from the Searcher trait
447-
}
448-
449367
/// Associated type for `<char as Pattern>::Searcher<'a>`.
450368
#[derive(Clone, Debug)]
451-
pub struct UnicodeCharSearcher<'a> {
369+
pub struct CharSearcher<'a> {
452370
haystack: &'a str,
453371
// safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
454372
// This invariant can be broken *within* next_match and next_match_back, however
@@ -471,15 +389,17 @@ pub struct UnicodeCharSearcher<'a> {
471389
utf8_size: u8,
472390
/// A utf8 encoded copy of the `needle`
473391
utf8_encoded: [u8; 4],
392+
/// yhm
393+
ascii: bool,
474394
}
475395

476-
impl UnicodeCharSearcher<'_> {
396+
impl CharSearcher<'_> {
477397
fn utf8_size(&self) -> usize {
478398
self.utf8_size.into()
479399
}
480400
}
481401

482-
unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
402+
unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
483403
#[inline]
484404
fn haystack(&self) -> &'a str {
485405
self.haystack
@@ -511,8 +431,23 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
511431
SearchStep::Done
512432
}
513433
}
514-
#[inline]
434+
#[inline(always)]
515435
fn next_match(&mut self) -> Option<(usize, usize)> {
436+
if self.ascii {
437+
// SAFETY: invariant
438+
return match unsafe {
439+
self.haystack.as_bytes().get_unchecked(self.finger..self.finger_back)
440+
}
441+
.iter()
442+
.position(|x| *x == self.utf8_encoded[0])
443+
{
444+
Some(x) => {
445+
self.finger += x + 1;
446+
Some((self.finger - 1, self.finger))
447+
}
448+
None => None,
449+
};
450+
}
516451
loop {
517452
// get the haystack after the last character found
518453
let bytes = self.haystack.as_bytes().get(self.finger..self.finger_back)?;
@@ -532,7 +467,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
532467
//
533468
// However, this is totally okay. While we have the invariant that
534469
// self.finger is on a UTF8 boundary, this invariant is not relied upon
535-
// within this method (it is relied upon in UnicodeCharSearcher::next()).
470+
// within this method (it is relied upon in CharSearcher::next()).
536471
//
537472
// We only exit this method when we reach the end of the string, or if we
538473
// find something. When we find something the `finger` will be set
@@ -557,7 +492,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
557492
// let next_reject use the default implementation from the Searcher trait
558493
}
559494

560-
unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
495+
unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
561496
#[inline]
562497
fn next_back(&mut self) -> SearchStep {
563498
let old_finger = self.finger_back;
@@ -580,6 +515,20 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
580515
}
581516
#[inline]
582517
fn next_match_back(&mut self) -> Option<(usize, usize)> {
518+
if self.ascii {
519+
// SAFETY: invariant
520+
return match memchr::memrchr(
521+
self.utf8_encoded[0],
522+
self.haystack[self.finger..self.finger_back].as_bytes(),
523+
) {
524+
Some(x) => {
525+
let index = self.finger + x;
526+
self.finger_back = index;
527+
Some((self.finger_back, self.finger_back + 1))
528+
}
529+
None => None,
530+
};
531+
}
583532
let haystack = self.haystack.as_bytes();
584533
loop {
585534
// get the haystack up to but not including the last character searched
@@ -632,57 +581,6 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
632581
}
633582

634583
impl<'a> DoubleEndedSearcher<'a> for CharSearcher<'a> {}
635-
#[derive(Clone, Debug)]
636-
///h
637-
pub enum CharSearcher<'a> {
638-
///h
639-
AsciiCharSearcher(AsciiCharSearcher<'a>),
640-
///h
641-
UnicodeCharSearcher(UnicodeCharSearcher<'a>),
642-
}
643-
unsafe impl<'a> Searcher<'a> for CharSearcher<'a> {
644-
#[inline]
645-
646-
fn haystack(&self) -> &'a str {
647-
let (Self::UnicodeCharSearcher(UnicodeCharSearcher { haystack, .. })
648-
| Self::AsciiCharSearcher(AsciiCharSearcher { haystack, .. })) = self;
649-
haystack
650-
}
651-
#[inline(always)]
652-
653-
fn next_match(&mut self) -> Option<(usize, usize)> {
654-
match self {
655-
CharSearcher::AsciiCharSearcher(x) => x.next_match(),
656-
CharSearcher::UnicodeCharSearcher(x) => x.next_match(),
657-
}
658-
}
659-
#[inline]
660-
661-
fn next(&mut self) -> SearchStep {
662-
match self {
663-
CharSearcher::AsciiCharSearcher(x) => x.next(),
664-
CharSearcher::UnicodeCharSearcher(x) => x.next(),
665-
}
666-
}
667-
}
668-
unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
669-
#[inline]
670-
671-
fn next_back(&mut self) -> SearchStep {
672-
match self {
673-
CharSearcher::AsciiCharSearcher(x) => x.next_back(),
674-
CharSearcher::UnicodeCharSearcher(x) => x.next_back(),
675-
}
676-
}
677-
#[inline]
678-
679-
fn next_match_back(&mut self) -> Option<(usize, usize)> {
680-
match self {
681-
CharSearcher::AsciiCharSearcher(x) => x.next_match_back(),
682-
CharSearcher::UnicodeCharSearcher(x) => x.next_match_back(),
683-
}
684-
}
685-
}
686584

687585
/// Searches for chars that are equal to a given [`char`].
688586
///
@@ -696,31 +594,21 @@ impl Pattern for char {
696594

697595
#[inline]
698596
fn into_searcher<'a>(self, haystack: &'a str) -> Self::Searcher<'a> {
699-
if (self as u32) < 128 {}
700597
let mut utf8_encoded = [0; MAX_LEN_UTF8];
701598
let utf8_size = self
702599
.encode_utf8(&mut utf8_encoded)
703600
.len()
704601
.try_into()
705602
.expect("char len should be less than 255");
706-
if utf8_size == 1 {
707-
CharSearcher::AsciiCharSearcher(AsciiCharSearcher {
708-
haystack,
709-
needle: utf8_encoded[0],
710-
finger: 0,
711-
finger_back: haystack.len(),
712-
// available: None,
713-
// available_back: None,
714-
})
715-
} else {
716-
CharSearcher::UnicodeCharSearcher(UnicodeCharSearcher {
717-
haystack,
718-
finger: 0,
719-
finger_back: haystack.len(),
720-
needle: self,
721-
utf8_size,
722-
utf8_encoded,
723-
})
603+
604+
CharSearcher {
605+
haystack,
606+
finger: 0,
607+
finger_back: haystack.len(),
608+
needle: self,
609+
utf8_size,
610+
utf8_encoded,
611+
ascii: utf8_size == 1,
724612
}
725613
}
726614

0 commit comments

Comments
 (0)