@@ -364,91 +364,9 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
364
364
// Impl for char
365
365
/////////////////////////////////////////////////////////////////////////////
366
366
367
- #[ derive( Clone , Debug ) ]
368
- /// hah
369
- pub struct AsciiCharSearcher < ' a > {
370
- haystack : & ' a str ,
371
- needle : u8 ,
372
- finger : usize ,
373
- finger_back : usize ,
374
- }
375
-
376
- unsafe impl < ' a > Searcher < ' a > for AsciiCharSearcher < ' a > {
377
- fn haystack ( & self ) -> & ' a str {
378
- self . haystack
379
- }
380
-
381
- #[ inline]
382
- fn next ( & mut self ) -> SearchStep {
383
- let byte = self . haystack . as_bytes ( ) . get ( self . finger ) ;
384
- if let Some ( & byte) = byte {
385
- self . finger += 1 ;
386
- if byte == self . needle {
387
- SearchStep :: Match ( self . finger - 1 , self . finger )
388
- } else {
389
- SearchStep :: Reject ( self . finger - 1 , self . finger )
390
- }
391
- } else {
392
- SearchStep :: Done
393
- }
394
- }
395
-
396
- #[ inline( always) ]
397
- fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
398
- match unsafe { self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back ) }
399
- . iter ( )
400
- . position ( |x| * x == self . needle )
401
- {
402
- Some ( x) => {
403
- self . finger += x + 1 ;
404
- Some ( ( self . finger - 1 , self . finger ) )
405
- }
406
- None => None ,
407
- }
408
- }
409
-
410
- // let next_reject use the default implementation from the Searcher trait
411
- }
412
-
413
- unsafe impl < ' a > ReverseSearcher < ' a > for AsciiCharSearcher < ' a > {
414
- #[ inline]
415
- fn next_back ( & mut self ) -> SearchStep {
416
- let old_finger = self . finger_back ;
417
- let slice = unsafe { self . haystack . get_unchecked ( self . finger ..old_finger) } ;
418
-
419
- let mut iter = slice. as_bytes ( ) . iter ( ) ;
420
- let old_len = iter. len ( ) ;
421
- if let Some ( ch) = iter. next_back ( ) {
422
- self . finger_back -= old_len - iter. len ( ) ;
423
- if * ch == self . needle {
424
- SearchStep :: Match ( self . finger_back , old_finger)
425
- } else {
426
- SearchStep :: Reject ( self . finger_back , old_finger)
427
- }
428
- } else {
429
- SearchStep :: Done
430
- }
431
- }
432
-
433
- #[ inline]
434
- fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
435
- match memchr:: memrchr ( self . needle , self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) )
436
- {
437
- Some ( x) => {
438
- let index = self . finger + x;
439
- self . finger_back = index;
440
- Some ( ( self . finger_back , self . finger_back + 1 ) )
441
- }
442
- None => None ,
443
- }
444
- }
445
-
446
- // let next_reject use the default implementation from the Searcher trait
447
- }
448
-
449
367
/// Associated type for `<char as Pattern>::Searcher<'a>`.
450
368
#[ derive( Clone , Debug ) ]
451
- pub struct UnicodeCharSearcher < ' a > {
369
+ pub struct CharSearcher < ' a > {
452
370
haystack : & ' a str ,
453
371
// safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
454
372
// This invariant can be broken *within* next_match and next_match_back, however
@@ -471,15 +389,17 @@ pub struct UnicodeCharSearcher<'a> {
471
389
utf8_size : u8 ,
472
390
/// A utf8 encoded copy of the `needle`
473
391
utf8_encoded : [ u8 ; 4 ] ,
392
+ /// yhm
393
+ ascii : bool ,
474
394
}
475
395
476
- impl UnicodeCharSearcher < ' _ > {
396
+ impl CharSearcher < ' _ > {
477
397
fn utf8_size ( & self ) -> usize {
478
398
self . utf8_size . into ( )
479
399
}
480
400
}
481
401
482
- unsafe impl < ' a > Searcher < ' a > for UnicodeCharSearcher < ' a > {
402
+ unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
483
403
#[ inline]
484
404
fn haystack ( & self ) -> & ' a str {
485
405
self . haystack
@@ -511,8 +431,23 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
511
431
SearchStep :: Done
512
432
}
513
433
}
514
- #[ inline]
434
+ #[ inline( always ) ]
515
435
fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
436
+ if self . ascii {
437
+ // SAFETY: invariant
438
+ return match unsafe {
439
+ self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back )
440
+ }
441
+ . iter ( )
442
+ . position ( |x| * x == self . utf8_encoded [ 0 ] )
443
+ {
444
+ Some ( x) => {
445
+ self . finger += x + 1 ;
446
+ Some ( ( self . finger - 1 , self . finger ) )
447
+ }
448
+ None => None ,
449
+ } ;
450
+ }
516
451
loop {
517
452
// get the haystack after the last character found
518
453
let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
@@ -532,7 +467,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
532
467
//
533
468
// However, this is totally okay. While we have the invariant that
534
469
// self.finger is on a UTF8 boundary, this invariant is not relied upon
535
- // within this method (it is relied upon in UnicodeCharSearcher ::next()).
470
+ // within this method (it is relied upon in CharSearcher ::next()).
536
471
//
537
472
// We only exit this method when we reach the end of the string, or if we
538
473
// find something. When we find something the `finger` will be set
@@ -557,7 +492,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
557
492
// let next_reject use the default implementation from the Searcher trait
558
493
}
559
494
560
- unsafe impl < ' a > ReverseSearcher < ' a > for UnicodeCharSearcher < ' a > {
495
+ unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
561
496
#[ inline]
562
497
fn next_back ( & mut self ) -> SearchStep {
563
498
let old_finger = self . finger_back ;
@@ -580,6 +515,20 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
580
515
}
581
516
#[ inline]
582
517
fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
518
+ if self . ascii {
519
+ // SAFETY: invariant
520
+ return match memchr:: memrchr (
521
+ self . utf8_encoded [ 0 ] ,
522
+ self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) ,
523
+ ) {
524
+ Some ( x) => {
525
+ let index = self . finger + x;
526
+ self . finger_back = index;
527
+ Some ( ( self . finger_back , self . finger_back + 1 ) )
528
+ }
529
+ None => None ,
530
+ } ;
531
+ }
583
532
let haystack = self . haystack . as_bytes ( ) ;
584
533
loop {
585
534
// get the haystack up to but not including the last character searched
@@ -632,57 +581,6 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
632
581
}
633
582
634
583
impl < ' a > DoubleEndedSearcher < ' a > for CharSearcher < ' a > { }
635
- #[ derive( Clone , Debug ) ]
636
- ///h
637
- pub enum CharSearcher < ' a > {
638
- ///h
639
- AsciiCharSearcher ( AsciiCharSearcher < ' a > ) ,
640
- ///h
641
- UnicodeCharSearcher ( UnicodeCharSearcher < ' a > ) ,
642
- }
643
- unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
644
- #[ inline]
645
-
646
- fn haystack ( & self ) -> & ' a str {
647
- let ( Self :: UnicodeCharSearcher ( UnicodeCharSearcher { haystack, .. } )
648
- | Self :: AsciiCharSearcher ( AsciiCharSearcher { haystack, .. } ) ) = self ;
649
- haystack
650
- }
651
- #[ inline( always) ]
652
-
653
- fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
654
- match self {
655
- CharSearcher :: AsciiCharSearcher ( x) => x. next_match ( ) ,
656
- CharSearcher :: UnicodeCharSearcher ( x) => x. next_match ( ) ,
657
- }
658
- }
659
- #[ inline]
660
-
661
- fn next ( & mut self ) -> SearchStep {
662
- match self {
663
- CharSearcher :: AsciiCharSearcher ( x) => x. next ( ) ,
664
- CharSearcher :: UnicodeCharSearcher ( x) => x. next ( ) ,
665
- }
666
- }
667
- }
668
- unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
669
- #[ inline]
670
-
671
- fn next_back ( & mut self ) -> SearchStep {
672
- match self {
673
- CharSearcher :: AsciiCharSearcher ( x) => x. next_back ( ) ,
674
- CharSearcher :: UnicodeCharSearcher ( x) => x. next_back ( ) ,
675
- }
676
- }
677
- #[ inline]
678
-
679
- fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
680
- match self {
681
- CharSearcher :: AsciiCharSearcher ( x) => x. next_match_back ( ) ,
682
- CharSearcher :: UnicodeCharSearcher ( x) => x. next_match_back ( ) ,
683
- }
684
- }
685
- }
686
584
687
585
/// Searches for chars that are equal to a given [`char`].
688
586
///
@@ -696,31 +594,21 @@ impl Pattern for char {
696
594
697
595
#[ inline]
698
596
fn into_searcher < ' a > ( self , haystack : & ' a str ) -> Self :: Searcher < ' a > {
699
- if ( self as u32 ) < 128 { }
700
597
let mut utf8_encoded = [ 0 ; MAX_LEN_UTF8 ] ;
701
598
let utf8_size = self
702
599
. encode_utf8 ( & mut utf8_encoded)
703
600
. len ( )
704
601
. try_into ( )
705
602
. expect ( "char len should be less than 255" ) ;
706
- if utf8_size == 1 {
707
- CharSearcher :: AsciiCharSearcher ( AsciiCharSearcher {
708
- haystack,
709
- needle : utf8_encoded[ 0 ] ,
710
- finger : 0 ,
711
- finger_back : haystack. len ( ) ,
712
- // available: None,
713
- // available_back: None,
714
- } )
715
- } else {
716
- CharSearcher :: UnicodeCharSearcher ( UnicodeCharSearcher {
717
- haystack,
718
- finger : 0 ,
719
- finger_back : haystack. len ( ) ,
720
- needle : self ,
721
- utf8_size,
722
- utf8_encoded,
723
- } )
603
+
604
+ CharSearcher {
605
+ haystack,
606
+ finger : 0 ,
607
+ finger_back : haystack. len ( ) ,
608
+ needle : self ,
609
+ utf8_size,
610
+ utf8_encoded,
611
+ ascii : utf8_size == 1 ,
724
612
}
725
613
}
726
614
0 commit comments