@@ -364,91 +364,9 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
364
364
// Impl for char
365
365
/////////////////////////////////////////////////////////////////////////////
366
366
367
- #[ derive( Clone , Debug ) ]
368
- /// hah
369
- pub struct AsciiCharSearcher < ' a > {
370
- haystack : & ' a str ,
371
- needle : u8 ,
372
- finger : usize ,
373
- finger_back : usize ,
374
- }
375
-
376
- unsafe impl < ' a > Searcher < ' a > for AsciiCharSearcher < ' a > {
377
- fn haystack ( & self ) -> & ' a str {
378
- self . haystack
379
- }
380
-
381
- #[ inline]
382
- fn next ( & mut self ) -> SearchStep {
383
- let byte = self . haystack . as_bytes ( ) . get ( self . finger ) ;
384
- if let Some ( & byte) = byte {
385
- self . finger += 1 ;
386
- if byte == self . needle {
387
- SearchStep :: Match ( self . finger - 1 , self . finger )
388
- } else {
389
- SearchStep :: Reject ( self . finger - 1 , self . finger )
390
- }
391
- } else {
392
- SearchStep :: Done
393
- }
394
- }
395
-
396
- #[ inline( always) ]
397
- fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
398
- match unsafe { self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back ) }
399
- . iter ( )
400
- . position ( |x| * x == self . needle )
401
- {
402
- Some ( x) => {
403
- self . finger += x + 1 ;
404
- Some ( ( self . finger - 1 , self . finger ) )
405
- }
406
- None => None ,
407
- }
408
- }
409
-
410
- // let next_reject use the default implementation from the Searcher trait
411
- }
412
-
413
- unsafe impl < ' a > ReverseSearcher < ' a > for AsciiCharSearcher < ' a > {
414
- #[ inline]
415
- fn next_back ( & mut self ) -> SearchStep {
416
- let old_finger = self . finger_back ;
417
- let slice = unsafe { self . haystack . get_unchecked ( self . finger ..old_finger) } ;
418
-
419
- let mut iter = slice. as_bytes ( ) . iter ( ) ;
420
- let old_len = iter. len ( ) ;
421
- if let Some ( ch) = iter. next_back ( ) {
422
- self . finger_back -= old_len - iter. len ( ) ;
423
- if * ch == self . needle {
424
- SearchStep :: Match ( self . finger_back , old_finger)
425
- } else {
426
- SearchStep :: Reject ( self . finger_back , old_finger)
427
- }
428
- } else {
429
- SearchStep :: Done
430
- }
431
- }
432
-
433
- #[ inline]
434
- fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
435
- match memchr:: memrchr ( self . needle , self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) )
436
- {
437
- Some ( x) => {
438
- let index = self . finger + x;
439
- self . finger_back = index;
440
- Some ( ( self . finger_back , self . finger_back + 1 ) )
441
- }
442
- None => None ,
443
- }
444
- }
445
-
446
- // let next_reject use the default implementation from the Searcher trait
447
- }
448
-
449
367
/// Associated type for `<char as Pattern>::Searcher<'a>`.
450
368
#[ derive( Clone , Debug ) ]
451
- pub struct UnicodeCharSearcher < ' a > {
369
+ pub struct CharSearcher < ' a > {
452
370
haystack : & ' a str ,
453
371
// safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
454
372
// This invariant can be broken *within* next_match and next_match_back, however
@@ -473,13 +391,13 @@ pub struct UnicodeCharSearcher<'a> {
473
391
utf8_encoded : [ u8 ; 4 ] ,
474
392
}
475
393
476
- impl UnicodeCharSearcher < ' _ > {
394
+ impl CharSearcher < ' _ > {
477
395
fn utf8_size ( & self ) -> usize {
478
396
self . utf8_size . into ( )
479
397
}
480
398
}
481
399
482
- unsafe impl < ' a > Searcher < ' a > for UnicodeCharSearcher < ' a > {
400
+ unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
483
401
#[ inline]
484
402
fn haystack ( & self ) -> & ' a str {
485
403
self . haystack
@@ -511,8 +429,23 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
511
429
SearchStep :: Done
512
430
}
513
431
}
514
- #[ inline]
432
+ #[ inline( always ) ]
515
433
fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
434
+ if self . utf8_size == 1 {
435
+ // SAFETY: invariant
436
+ return match unsafe {
437
+ self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back )
438
+ }
439
+ . iter ( )
440
+ . position ( |x| * x == self . utf8_encoded [ 0 ] )
441
+ {
442
+ Some ( x) => {
443
+ self . finger += x + 1 ;
444
+ Some ( ( self . finger - 1 , self . finger ) )
445
+ }
446
+ None => None ,
447
+ } ;
448
+ }
516
449
loop {
517
450
// get the haystack after the last character found
518
451
let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
@@ -532,7 +465,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
532
465
//
533
466
// However, this is totally okay. While we have the invariant that
534
467
// self.finger is on a UTF8 boundary, this invariant is not relied upon
535
- // within this method (it is relied upon in UnicodeCharSearcher ::next()).
468
+ // within this method (it is relied upon in CharSearcher ::next()).
536
469
//
537
470
// We only exit this method when we reach the end of the string, or if we
538
471
// find something. When we find something the `finger` will be set
@@ -557,7 +490,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
557
490
// let next_reject use the default implementation from the Searcher trait
558
491
}
559
492
560
- unsafe impl < ' a > ReverseSearcher < ' a > for UnicodeCharSearcher < ' a > {
493
+ unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
561
494
#[ inline]
562
495
fn next_back ( & mut self ) -> SearchStep {
563
496
let old_finger = self . finger_back ;
@@ -580,6 +513,20 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
580
513
}
581
514
#[ inline]
582
515
fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
516
+ if self . utf8_size == 1 {
517
+ // SAFETY: invariant
518
+ return match memchr:: memrchr (
519
+ self . utf8_encoded [ 0 ] ,
520
+ self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) ,
521
+ ) {
522
+ Some ( x) => {
523
+ let index = self . finger + x;
524
+ self . finger_back = index;
525
+ Some ( ( self . finger_back , self . finger_back + 1 ) )
526
+ }
527
+ None => None ,
528
+ } ;
529
+ }
583
530
let haystack = self . haystack . as_bytes ( ) ;
584
531
loop {
585
532
// get the haystack up to but not including the last character searched
@@ -632,57 +579,6 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
632
579
}
633
580
634
581
impl < ' a > DoubleEndedSearcher < ' a > for CharSearcher < ' a > { }
635
- #[ derive( Clone , Debug ) ]
636
- ///h
637
- pub enum CharSearcher < ' a > {
638
- ///h
639
- AsciiCharSearcher ( AsciiCharSearcher < ' a > ) ,
640
- ///h
641
- UnicodeCharSearcher ( UnicodeCharSearcher < ' a > ) ,
642
- }
643
- unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
644
- #[ inline]
645
-
646
- fn haystack ( & self ) -> & ' a str {
647
- let ( Self :: UnicodeCharSearcher ( UnicodeCharSearcher { haystack, .. } )
648
- | Self :: AsciiCharSearcher ( AsciiCharSearcher { haystack, .. } ) ) = self ;
649
- haystack
650
- }
651
- #[ inline( always) ]
652
-
653
- fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
654
- match self {
655
- CharSearcher :: AsciiCharSearcher ( x) => x. next_match ( ) ,
656
- CharSearcher :: UnicodeCharSearcher ( x) => x. next_match ( ) ,
657
- }
658
- }
659
- #[ inline]
660
-
661
- fn next ( & mut self ) -> SearchStep {
662
- match self {
663
- CharSearcher :: AsciiCharSearcher ( x) => x. next ( ) ,
664
- CharSearcher :: UnicodeCharSearcher ( x) => x. next ( ) ,
665
- }
666
- }
667
- }
668
- unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
669
- #[ inline]
670
-
671
- fn next_back ( & mut self ) -> SearchStep {
672
- match self {
673
- CharSearcher :: AsciiCharSearcher ( x) => x. next_back ( ) ,
674
- CharSearcher :: UnicodeCharSearcher ( x) => x. next_back ( ) ,
675
- }
676
- }
677
- #[ inline]
678
-
679
- fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
680
- match self {
681
- CharSearcher :: AsciiCharSearcher ( x) => x. next_match_back ( ) ,
682
- CharSearcher :: UnicodeCharSearcher ( x) => x. next_match_back ( ) ,
683
- }
684
- }
685
- }
686
582
687
583
/// Searches for chars that are equal to a given [`char`].
688
584
///
@@ -696,31 +592,20 @@ impl Pattern for char {
696
592
697
593
#[ inline]
698
594
fn into_searcher < ' a > ( self , haystack : & ' a str ) -> Self :: Searcher < ' a > {
699
- if ( self as u32 ) < 128 { }
700
595
let mut utf8_encoded = [ 0 ; MAX_LEN_UTF8 ] ;
701
596
let utf8_size = self
702
597
. encode_utf8 ( & mut utf8_encoded)
703
598
. len ( )
704
599
. try_into ( )
705
600
. expect ( "char len should be less than 255" ) ;
706
- if utf8_size == 1 {
707
- CharSearcher :: AsciiCharSearcher ( AsciiCharSearcher {
708
- haystack,
709
- needle : utf8_encoded[ 0 ] ,
710
- finger : 0 ,
711
- finger_back : haystack. len ( ) ,
712
- // available: None,
713
- // available_back: None,
714
- } )
715
- } else {
716
- CharSearcher :: UnicodeCharSearcher ( UnicodeCharSearcher {
717
- haystack,
718
- finger : 0 ,
719
- finger_back : haystack. len ( ) ,
720
- needle : self ,
721
- utf8_size,
722
- utf8_encoded,
723
- } )
601
+
602
+ CharSearcher {
603
+ haystack,
604
+ finger : 0 ,
605
+ finger_back : haystack. len ( ) ,
606
+ needle : self ,
607
+ utf8_size,
608
+ utf8_encoded,
724
609
}
725
610
}
726
611
0 commit comments