@@ -16,8 +16,10 @@ use memchr::{memchr, memchr2, memchr3};
16
16
use syntax:: hir:: literal:: { Literal , Literals } ;
17
17
18
18
use freqs:: BYTE_FREQUENCIES ;
19
- use self :: teddy_ssse3:: Teddy ;
19
+ use self :: teddy_avx2:: { Teddy as TeddyAVX2 } ;
20
+ use self :: teddy_ssse3:: { Teddy as TeddySSSE3 } ;
20
21
22
+ mod teddy_avx2;
21
23
mod teddy_ssse3;
22
24
23
25
/// A prefix extracted from a compiled regular expression.
@@ -47,7 +49,10 @@ enum Matcher {
47
49
AC ( FullAcAutomaton < Literal > ) ,
48
50
/// A simd accelerated multiple string matcher. Used only for a small
49
51
/// number of small literals.
50
- Teddy128 ( Teddy ) ,
52
+ TeddySSSE3 ( TeddySSSE3 ) ,
53
+ /// A simd accelerated multiple string matcher. Used only for a small
54
+ /// number of small literals. This uses 256-bit vectors.
55
+ TeddyAVX2 ( TeddyAVX2 ) ,
51
56
}
52
57
53
58
impl LiteralSearcher {
@@ -98,7 +103,8 @@ impl LiteralSearcher {
98
103
FreqyPacked ( ref s) => s. find ( haystack) . map ( |i| ( i, i + s. len ( ) ) ) ,
99
104
BoyerMoore ( ref s) => s. find ( haystack) . map ( |i| ( i, i + s. len ( ) ) ) ,
100
105
AC ( ref aut) => aut. find ( haystack) . next ( ) . map ( |m| ( m. start , m. end ) ) ,
101
- Teddy128 ( ref ted) => ted. find ( haystack) . map ( |m| ( m. start , m. end ) ) ,
106
+ TeddySSSE3 ( ref t) => t. find ( haystack) . map ( |m| ( m. start , m. end ) ) ,
107
+ TeddyAVX2 ( ref t) => t. find ( haystack) . map ( |m| ( m. start , m. end ) ) ,
102
108
}
103
109
}
104
110
@@ -136,8 +142,11 @@ impl LiteralSearcher {
136
142
Matcher :: FreqyPacked ( ref s) => LiteralIter :: Single ( & s. pat ) ,
137
143
Matcher :: BoyerMoore ( ref s) => LiteralIter :: Single ( & s. pattern ) ,
138
144
Matcher :: AC ( ref ac) => LiteralIter :: AC ( ac. patterns ( ) ) ,
139
- Matcher :: Teddy128 ( ref ted) => {
140
- LiteralIter :: Teddy128 ( ted. patterns ( ) )
145
+ Matcher :: TeddySSSE3 ( ref ted) => {
146
+ LiteralIter :: TeddySSSE3 ( ted. patterns ( ) )
147
+ }
148
+ Matcher :: TeddyAVX2 ( ref ted) => {
149
+ LiteralIter :: TeddyAVX2 ( ted. patterns ( ) )
141
150
}
142
151
}
143
152
}
@@ -166,7 +175,8 @@ impl LiteralSearcher {
166
175
FreqyPacked ( _) => 1 ,
167
176
BoyerMoore ( _) => 1 ,
168
177
AC ( ref aut) => aut. len ( ) ,
169
- Teddy128 ( ref ted) => ted. len ( ) ,
178
+ TeddySSSE3 ( ref ted) => ted. len ( ) ,
179
+ TeddyAVX2 ( ref ted) => ted. len ( ) ,
170
180
}
171
181
}
172
182
@@ -179,7 +189,8 @@ impl LiteralSearcher {
179
189
FreqyPacked ( ref single) => single. approximate_size ( ) ,
180
190
BoyerMoore ( ref single) => single. approximate_size ( ) ,
181
191
AC ( ref aut) => aut. heap_bytes ( ) ,
182
- Teddy128 ( ref ted) => ted. approximate_size ( ) ,
192
+ TeddySSSE3 ( ref ted) => ted. approximate_size ( ) ,
193
+ TeddyAVX2 ( ref ted) => ted. approximate_size ( ) ,
183
194
}
184
195
}
185
196
}
@@ -220,7 +231,15 @@ impl Matcher {
220
231
}
221
232
}
222
233
let is_aho_corasick_fast = sset. dense . len ( ) == 1 && sset. all_ascii ;
223
- if Teddy :: available ( ) && !is_aho_corasick_fast {
234
+ if TeddyAVX2 :: available ( ) && !is_aho_corasick_fast {
235
+ const MAX_TEDDY_LITERALS : usize = 32 ;
236
+ if lits. literals ( ) . len ( ) <= MAX_TEDDY_LITERALS {
237
+ if let Some ( ted) = TeddyAVX2 :: new ( lits) {
238
+ return Matcher :: TeddyAVX2 ( ted) ;
239
+ }
240
+ }
241
+ }
242
+ if TeddySSSE3 :: available ( ) && !is_aho_corasick_fast {
224
243
// Only try Teddy if Aho-Corasick can't use memchr on an ASCII
225
244
// byte. Also, in its current form, Teddy doesn't scale well to
226
245
// lots of literals.
@@ -232,8 +251,8 @@ impl Matcher {
232
251
// negating the benefit of memchr.
233
252
const MAX_TEDDY_LITERALS : usize = 32 ;
234
253
if lits. literals ( ) . len ( ) <= MAX_TEDDY_LITERALS {
235
- if let Some ( ted) = Teddy :: new ( lits) {
236
- return Matcher :: Teddy128 ( ted) ;
254
+ if let Some ( ted) = TeddySSSE3 :: new ( lits) {
255
+ return Matcher :: TeddySSSE3 ( ted) ;
237
256
}
238
257
}
239
258
// Fallthrough to ol' reliable Aho-Corasick...
@@ -248,7 +267,8 @@ pub enum LiteralIter<'a> {
248
267
Bytes ( & ' a [ u8 ] ) ,
249
268
Single ( & ' a [ u8 ] ) ,
250
269
AC ( & ' a [ Literal ] ) ,
251
- Teddy128 ( & ' a [ Vec < u8 > ] ) ,
270
+ TeddySSSE3 ( & ' a [ Vec < u8 > ] ) ,
271
+ TeddyAVX2 ( & ' a [ Vec < u8 > ] ) ,
252
272
}
253
273
254
274
impl < ' a > Iterator for LiteralIter < ' a > {
@@ -284,7 +304,16 @@ impl<'a> Iterator for LiteralIter<'a> {
284
304
Some ( & * * next)
285
305
}
286
306
}
287
- LiteralIter :: Teddy128 ( ref mut lits) => {
307
+ LiteralIter :: TeddySSSE3 ( ref mut lits) => {
308
+ if lits. is_empty ( ) {
309
+ None
310
+ } else {
311
+ let next = & lits[ 0 ] ;
312
+ * lits = & lits[ 1 ..] ;
313
+ Some ( & * * next)
314
+ }
315
+ }
316
+ LiteralIter :: TeddyAVX2 ( ref mut lits) => {
288
317
if lits. is_empty ( ) {
289
318
None
290
319
} else {
0 commit comments