11
11
// Original implementation taken from rust-memchr
12
12
// Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
13
13
14
-
15
-
16
14
/// A safe interface to `memchr`.
17
15
///
18
16
/// Returns the index corresponding to the first occurrence of `needle` in
32
30
/// let haystack = b"the quick brown fox";
33
31
/// assert_eq!(memchr(b'k', haystack), Some(8));
34
32
/// ```
33
+ #[ inline]
35
34
pub fn memchr ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
36
- // libc memchr
37
- #[ cfg( not( target_os = "windows" ) ) ]
38
- fn memchr_specific ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
39
- use libc;
40
-
41
- let p = unsafe {
42
- libc:: memchr (
43
- haystack. as_ptr ( ) as * const libc:: c_void ,
44
- needle as libc:: c_int ,
45
- haystack. len ( ) as libc:: size_t )
46
- } ;
47
- if p. is_null ( ) {
48
- None
49
- } else {
50
- Some ( p as usize - ( haystack. as_ptr ( ) as usize ) )
51
- }
52
- }
53
-
54
- // use fallback on windows, since it's faster
55
- #[ cfg( target_os = "windows" ) ]
56
- fn memchr_specific ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
57
- fallback:: memchr ( needle, haystack)
58
- }
59
-
60
- memchr_specific ( needle, haystack)
35
+ :: sys:: memchr:: memchr ( needle, haystack)
61
36
}
62
37
63
38
/// A safe interface to `memrchr`.
@@ -75,251 +50,9 @@ pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
75
50
/// let haystack = b"the quick brown fox";
76
51
/// assert_eq!(memrchr(b'o', haystack), Some(17));
77
52
/// ```
53
+ #[ inline]
78
54
pub fn memrchr ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
79
-
80
- #[ cfg( target_os = "linux" ) ]
81
- fn memrchr_specific ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
82
- use libc;
83
-
84
- // GNU's memrchr() will - unlike memchr() - error if haystack is empty.
85
- if haystack. is_empty ( ) { return None }
86
- let p = unsafe {
87
- libc:: memrchr (
88
- haystack. as_ptr ( ) as * const libc:: c_void ,
89
- needle as libc:: c_int ,
90
- haystack. len ( ) as libc:: size_t )
91
- } ;
92
- if p. is_null ( ) {
93
- None
94
- } else {
95
- Some ( p as usize - ( haystack. as_ptr ( ) as usize ) )
96
- }
97
- }
98
-
99
- #[ cfg( not( target_os = "linux" ) ) ]
100
- fn memrchr_specific ( needle : u8 , haystack : & [ u8 ] ) -> Option < usize > {
101
- fallback:: memrchr ( needle, haystack)
102
- }
103
-
104
- memrchr_specific ( needle, haystack)
105
- }
106
-
107
- #[ allow( dead_code) ]
108
- mod fallback {
109
- use cmp;
110
- use mem;
111
-
112
- const LO_U64 : u64 = 0x0101010101010101 ;
113
- const HI_U64 : u64 = 0x8080808080808080 ;
114
-
115
- // use truncation
116
- const LO_USIZE : usize = LO_U64 as usize ;
117
- const HI_USIZE : usize = HI_U64 as usize ;
118
-
119
- /// Return `true` if `x` contains any zero byte.
120
- ///
121
- /// From *Matters Computational*, J. Arndt
122
- ///
123
- /// "The idea is to subtract one from each of the bytes and then look for
124
- /// bytes where the borrow propagated all the way to the most significant
125
- /// bit."
126
- #[ inline]
127
- fn contains_zero_byte ( x : usize ) -> bool {
128
- x. wrapping_sub ( LO_USIZE ) & !x & HI_USIZE != 0
129
- }
130
-
131
- #[ cfg( target_pointer_width = "32" ) ]
132
- #[ inline]
133
- fn repeat_byte ( b : u8 ) -> usize {
134
- let mut rep = ( b as usize ) << 8 | b as usize ;
135
- rep = rep << 16 | rep;
136
- rep
137
- }
138
-
139
- #[ cfg( target_pointer_width = "64" ) ]
140
- #[ inline]
141
- fn repeat_byte ( b : u8 ) -> usize {
142
- let mut rep = ( b as usize ) << 8 | b as usize ;
143
- rep = rep << 16 | rep;
144
- rep = rep << 32 | rep;
145
- rep
146
- }
147
-
148
- /// Return the first index matching the byte `a` in `text`.
149
- pub fn memchr ( x : u8 , text : & [ u8 ] ) -> Option < usize > {
150
- // Scan for a single byte value by reading two `usize` words at a time.
151
- //
152
- // Split `text` in three parts
153
- // - unaligned initial part, before the first word aligned address in text
154
- // - body, scan by 2 words at a time
155
- // - the last remaining part, < 2 word size
156
- let len = text. len ( ) ;
157
- let ptr = text. as_ptr ( ) ;
158
- let usize_bytes = mem:: size_of :: < usize > ( ) ;
159
-
160
- // search up to an aligned boundary
161
- let align = ( ptr as usize ) & ( usize_bytes- 1 ) ;
162
- let mut offset;
163
- if align > 0 {
164
- offset = cmp:: min ( usize_bytes - align, len) ;
165
- if let Some ( index) = text[ ..offset] . iter ( ) . position ( |elt| * elt == x) {
166
- return Some ( index) ;
167
- }
168
- } else {
169
- offset = 0 ;
170
- }
171
-
172
- // search the body of the text
173
- let repeated_x = repeat_byte ( x) ;
174
-
175
- if len >= 2 * usize_bytes {
176
- while offset <= len - 2 * usize_bytes {
177
- unsafe {
178
- let u = * ( ptr. offset ( offset as isize ) as * const usize ) ;
179
- let v = * ( ptr. offset ( ( offset + usize_bytes) as isize ) as * const usize ) ;
180
-
181
- // break if there is a matching byte
182
- let zu = contains_zero_byte ( u ^ repeated_x) ;
183
- let zv = contains_zero_byte ( v ^ repeated_x) ;
184
- if zu || zv {
185
- break ;
186
- }
187
- }
188
- offset += usize_bytes * 2 ;
189
- }
190
- }
191
-
192
- // find the byte after the point the body loop stopped
193
- text[ offset..] . iter ( ) . position ( |elt| * elt == x) . map ( |i| offset + i)
194
- }
195
-
196
- /// Return the last index matching the byte `a` in `text`.
197
- pub fn memrchr ( x : u8 , text : & [ u8 ] ) -> Option < usize > {
198
- // Scan for a single byte value by reading two `usize` words at a time.
199
- //
200
- // Split `text` in three parts
201
- // - unaligned tail, after the last word aligned address in text
202
- // - body, scan by 2 words at a time
203
- // - the first remaining bytes, < 2 word size
204
- let len = text. len ( ) ;
205
- let ptr = text. as_ptr ( ) ;
206
- let usize_bytes = mem:: size_of :: < usize > ( ) ;
207
-
208
- // search to an aligned boundary
209
- let end_align = ( ptr as usize + len) & ( usize_bytes - 1 ) ;
210
- let mut offset;
211
- if end_align > 0 {
212
- offset = if end_align >= len { 0 } else { len - end_align } ;
213
- if let Some ( index) = text[ offset..] . iter ( ) . rposition ( |elt| * elt == x) {
214
- return Some ( offset + index) ;
215
- }
216
- } else {
217
- offset = len;
218
- }
219
-
220
- // search the body of the text
221
- let repeated_x = repeat_byte ( x) ;
222
-
223
- while offset >= 2 * usize_bytes {
224
- unsafe {
225
- let u = * ( ptr. offset ( offset as isize - 2 * usize_bytes as isize ) as * const usize ) ;
226
- let v = * ( ptr. offset ( offset as isize - usize_bytes as isize ) as * const usize ) ;
227
-
228
- // break if there is a matching byte
229
- let zu = contains_zero_byte ( u ^ repeated_x) ;
230
- let zv = contains_zero_byte ( v ^ repeated_x) ;
231
- if zu || zv {
232
- break ;
233
- }
234
- }
235
- offset -= 2 * usize_bytes;
236
- }
237
-
238
- // find the byte before the point the body loop stopped
239
- text[ ..offset] . iter ( ) . rposition ( |elt| * elt == x)
240
- }
241
-
242
- // test fallback implementations on all platforms
243
- #[ test]
244
- fn matches_one ( ) {
245
- assert_eq ! ( Some ( 0 ) , memchr( b'a' , b"a" ) ) ;
246
- }
247
-
248
- #[ test]
249
- fn matches_begin ( ) {
250
- assert_eq ! ( Some ( 0 ) , memchr( b'a' , b"aaaa" ) ) ;
251
- }
252
-
253
- #[ test]
254
- fn matches_end ( ) {
255
- assert_eq ! ( Some ( 4 ) , memchr( b'z' , b"aaaaz" ) ) ;
256
- }
257
-
258
- #[ test]
259
- fn matches_nul ( ) {
260
- assert_eq ! ( Some ( 4 ) , memchr( b'\x00' , b"aaaa\x00 " ) ) ;
261
- }
262
-
263
- #[ test]
264
- fn matches_past_nul ( ) {
265
- assert_eq ! ( Some ( 5 ) , memchr( b'z' , b"aaaa\x00 z" ) ) ;
266
- }
267
-
268
- #[ test]
269
- fn no_match_empty ( ) {
270
- assert_eq ! ( None , memchr( b'a' , b"" ) ) ;
271
- }
272
-
273
- #[ test]
274
- fn no_match ( ) {
275
- assert_eq ! ( None , memchr( b'a' , b"xyz" ) ) ;
276
- }
277
-
278
- #[ test]
279
- fn matches_one_reversed ( ) {
280
- assert_eq ! ( Some ( 0 ) , memrchr( b'a' , b"a" ) ) ;
281
- }
282
-
283
- #[ test]
284
- fn matches_begin_reversed ( ) {
285
- assert_eq ! ( Some ( 3 ) , memrchr( b'a' , b"aaaa" ) ) ;
286
- }
287
-
288
- #[ test]
289
- fn matches_end_reversed ( ) {
290
- assert_eq ! ( Some ( 0 ) , memrchr( b'z' , b"zaaaa" ) ) ;
291
- }
292
-
293
- #[ test]
294
- fn matches_nul_reversed ( ) {
295
- assert_eq ! ( Some ( 4 ) , memrchr( b'\x00' , b"aaaa\x00 " ) ) ;
296
- }
297
-
298
- #[ test]
299
- fn matches_past_nul_reversed ( ) {
300
- assert_eq ! ( Some ( 0 ) , memrchr( b'z' , b"z\x00 aaaa" ) ) ;
301
- }
302
-
303
- #[ test]
304
- fn no_match_empty_reversed ( ) {
305
- assert_eq ! ( None , memrchr( b'a' , b"" ) ) ;
306
- }
307
-
308
- #[ test]
309
- fn no_match_reversed ( ) {
310
- assert_eq ! ( None , memrchr( b'a' , b"xyz" ) ) ;
311
- }
312
-
313
- #[ test]
314
- fn each_alignment_reversed ( ) {
315
- let mut data = [ 1u8 ; 64 ] ;
316
- let needle = 2 ;
317
- let pos = 40 ;
318
- data[ pos] = needle;
319
- for start in 0 ..16 {
320
- assert_eq ! ( Some ( pos - start) , memrchr( needle, & data[ start..] ) ) ;
321
- }
322
- }
55
+ :: sys:: memchr:: memrchr ( needle, haystack)
323
56
}
324
57
325
58
#[ cfg( test) ]
0 commit comments