10
10
11
11
use std:: cell:: RefCell ;
12
12
use std:: collections:: HashMap ;
13
+ use std:: cmp;
13
14
use std:: sync:: Arc ;
14
15
15
16
use thread_local:: CachedThreadLocal ;
@@ -27,6 +28,7 @@ use re_bytes;
27
28
use re_trait:: { RegularExpression , Slot } ;
28
29
use re_unicode;
29
30
use set;
31
+ use utf8:: next_utf8;
30
32
31
33
/// Exec manages the execution of a regular expression.
32
34
///
@@ -253,17 +255,7 @@ impl<'c> RegularExpression for ExecNoSyncStr<'c> {
253
255
fn slots_len ( & self ) -> usize { self . 0 . slots_len ( ) }
254
256
255
257
fn next_after_empty ( & self , text : & str , i : usize ) -> usize {
256
- let b = text. as_bytes ( ) [ i] ;
257
- let inc = if b <= 0x7F {
258
- 1
259
- } else if b <= 0b110_11111 {
260
- 2
261
- } else if b <= 0b1110_1111 {
262
- 3
263
- } else {
264
- 4
265
- } ;
266
- i + inc
258
+ next_utf8 ( text. as_bytes ( ) , i)
267
259
}
268
260
269
261
#[ inline( always) ] // reduces constant overhead
@@ -433,15 +425,29 @@ impl<'c> RegularExpression for ExecNoSync<'c> {
433
425
}
434
426
match self . ro . match_type {
435
427
MatchType :: Literal ( ty) => {
436
- self . exec_literals ( ty, text, start) . and_then ( |( s, _) | {
437
- self . captures_nfa ( MatchNfaType :: Auto , slots, text, s)
428
+ self . exec_literals ( ty, text, start) . and_then ( |( s, e) | {
429
+ // We need the +1 here to account for lookahead
430
+ // operators.
431
+ let e = if self . ro . nfa . uses_bytes ( ) {
432
+ cmp:: min ( e + 1 , text. len ( ) )
433
+ } else {
434
+ cmp:: min ( next_utf8 ( text, e) , text. len ( ) )
435
+ } ;
436
+ self . captures_nfa ( MatchNfaType :: Auto , slots, & text[ ..e] , s)
438
437
} )
439
438
}
440
439
MatchType :: Dfa => {
441
440
match self . find_dfa_forward ( text, start) {
442
- dfa:: Result :: Match ( ( s, _) ) => {
441
+ dfa:: Result :: Match ( ( s, e) ) => {
442
+ // We need the +1 here to account for lookahead
443
+ // operators.
444
+ let e = if self . ro . nfa . uses_bytes ( ) {
445
+ cmp:: min ( e + 1 , text. len ( ) )
446
+ } else {
447
+ cmp:: min ( next_utf8 ( text, e) , text. len ( ) )
448
+ } ;
443
449
self . captures_nfa (
444
- MatchNfaType :: Auto , slots, text, s)
450
+ MatchNfaType :: Auto , slots, & text[ ..e ] , s)
445
451
}
446
452
dfa:: Result :: NoMatch => None ,
447
453
dfa:: Result :: Quit => {
0 commit comments