@@ -5,15 +5,17 @@ mod tests;
5
5
6
6
/// Finds all newlines, multi-byte characters, and non-narrow characters in a
7
7
/// SourceFile.
8
- ///
9
- /// This function will use an SSE2 enhanced implementation if hardware support
10
- /// is detected at runtime.
11
8
pub ( crate ) fn analyze_source_file ( src : & str ) -> ( Vec < RelativeBytePos > , Vec < MultiByteChar > ) {
12
9
let mut lines = vec ! [ RelativeBytePos :: from_u32( 0 ) ] ;
13
10
let mut multi_byte_chars = vec ! [ ] ;
14
11
15
- // Calls the right implementation, depending on hardware support available.
16
- analyze_source_file_dispatch ( src, & mut lines, & mut multi_byte_chars) ;
12
+ analyze_source_file_generic (
13
+ src,
14
+ src. len ( ) ,
15
+ RelativeBytePos ( 0 ) ,
16
+ & mut lines,
17
+ & mut multi_byte_chars,
18
+ ) ;
17
19
18
20
// The code above optimistically registers a new line *after* each \n
19
21
// it encounters. If that point is already outside the source_file, remove
@@ -29,256 +31,6 @@ pub(crate) fn analyze_source_file(src: &str) -> (Vec<RelativeBytePos>, Vec<Multi
29
31
( lines, multi_byte_chars)
30
32
}
31
33
32
- #[ cfg( bootstrap) ]
33
- cfg_match ! {
34
- cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) => {
35
- fn analyze_source_file_dispatch(
36
- src: & str ,
37
- lines: & mut Vec <RelativeBytePos >,
38
- multi_byte_chars: & mut Vec <MultiByteChar >,
39
- ) {
40
- if is_x86_feature_detected!( "sse2" ) {
41
- unsafe {
42
- analyze_source_file_sse2( src, lines, multi_byte_chars) ;
43
- }
44
- } else {
45
- analyze_source_file_generic(
46
- src,
47
- src. len( ) ,
48
- RelativeBytePos :: from_u32( 0 ) ,
49
- lines,
50
- multi_byte_chars,
51
- ) ;
52
- }
53
- }
54
-
55
- /// Checks 16 byte chunks of text at a time. If the chunk contains
56
- /// something other than printable ASCII characters and newlines, the
57
- /// function falls back to the generic implementation. Otherwise it uses
58
- /// SSE2 intrinsics to quickly find all newlines.
59
- #[ target_feature( enable = "sse2" ) ]
60
- unsafe fn analyze_source_file_sse2(
61
- src: & str ,
62
- lines: & mut Vec <RelativeBytePos >,
63
- multi_byte_chars: & mut Vec <MultiByteChar >,
64
- ) {
65
- #[ cfg( target_arch = "x86" ) ]
66
- use std:: arch:: x86:: * ;
67
- #[ cfg( target_arch = "x86_64" ) ]
68
- use std:: arch:: x86_64:: * ;
69
-
70
- const CHUNK_SIZE : usize = 16 ;
71
-
72
- let src_bytes = src. as_bytes( ) ;
73
-
74
- let chunk_count = src. len( ) / CHUNK_SIZE ;
75
-
76
- // This variable keeps track of where we should start decoding a
77
- // chunk. If a multi-byte character spans across chunk boundaries,
78
- // we need to skip that part in the next chunk because we already
79
- // handled it.
80
- let mut intra_chunk_offset = 0 ;
81
-
82
- for chunk_index in 0 ..chunk_count {
83
- let ptr = src_bytes. as_ptr( ) as * const __m128i;
84
- // We don't know if the pointer is aligned to 16 bytes, so we
85
- // use `loadu`, which supports unaligned loading.
86
- let chunk = unsafe { _mm_loadu_si128( ptr. add( chunk_index) ) } ;
87
-
88
- // For character in the chunk, see if its byte value is < 0, which
89
- // indicates that it's part of a UTF-8 char.
90
- let multibyte_test = unsafe { _mm_cmplt_epi8( chunk, _mm_set1_epi8( 0 ) ) } ;
91
- // Create a bit mask from the comparison results.
92
- let multibyte_mask = unsafe { _mm_movemask_epi8( multibyte_test) } ;
93
-
94
- // If the bit mask is all zero, we only have ASCII chars here:
95
- if multibyte_mask == 0 {
96
- assert!( intra_chunk_offset == 0 ) ;
97
-
98
- // Check for newlines in the chunk
99
- let newlines_test = unsafe { _mm_cmpeq_epi8( chunk, _mm_set1_epi8( b'\n' as i8 ) ) } ;
100
- let mut newlines_mask = unsafe { _mm_movemask_epi8( newlines_test) } ;
101
-
102
- let output_offset = RelativeBytePos :: from_usize( chunk_index * CHUNK_SIZE + 1 ) ;
103
-
104
- while newlines_mask != 0 {
105
- let index = newlines_mask. trailing_zeros( ) ;
106
-
107
- lines. push( RelativeBytePos ( index) + output_offset) ;
108
-
109
- // Clear the bit, so we can find the next one.
110
- newlines_mask &= newlines_mask - 1 ;
111
- }
112
- } else {
113
- // The slow path.
114
- // There are multibyte chars in here, fallback to generic decoding.
115
- let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
116
- intra_chunk_offset = analyze_source_file_generic(
117
- & src[ scan_start..] ,
118
- CHUNK_SIZE - intra_chunk_offset,
119
- RelativeBytePos :: from_usize( scan_start) ,
120
- lines,
121
- multi_byte_chars,
122
- ) ;
123
- }
124
- }
125
-
126
- // There might still be a tail left to analyze
127
- let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
128
- if tail_start < src. len( ) {
129
- analyze_source_file_generic(
130
- & src[ tail_start..] ,
131
- src. len( ) - tail_start,
132
- RelativeBytePos :: from_usize( tail_start) ,
133
- lines,
134
- multi_byte_chars,
135
- ) ;
136
- }
137
- }
138
- }
139
- _ => {
140
- // The target (or compiler version) does not support SSE2 ...
141
- fn analyze_source_file_dispatch(
142
- src: & str ,
143
- lines: & mut Vec <RelativeBytePos >,
144
- multi_byte_chars: & mut Vec <MultiByteChar >,
145
- ) {
146
- analyze_source_file_generic(
147
- src,
148
- src. len( ) ,
149
- RelativeBytePos :: from_u32( 0 ) ,
150
- lines,
151
- multi_byte_chars,
152
- ) ;
153
- }
154
- }
155
- }
156
-
157
- #[ cfg( not( bootstrap) ) ]
158
- cfg_match ! {
159
- any( target_arch = "x86" , target_arch = "x86_64" ) => {
160
- fn analyze_source_file_dispatch(
161
- src: & str ,
162
- lines: & mut Vec <RelativeBytePos >,
163
- multi_byte_chars: & mut Vec <MultiByteChar >,
164
- ) {
165
- if is_x86_feature_detected!( "sse2" ) {
166
- unsafe {
167
- analyze_source_file_sse2( src, lines, multi_byte_chars) ;
168
- }
169
- } else {
170
- analyze_source_file_generic(
171
- src,
172
- src. len( ) ,
173
- RelativeBytePos :: from_u32( 0 ) ,
174
- lines,
175
- multi_byte_chars,
176
- ) ;
177
- }
178
- }
179
-
180
- /// Checks 16 byte chunks of text at a time. If the chunk contains
181
- /// something other than printable ASCII characters and newlines, the
182
- /// function falls back to the generic implementation. Otherwise it uses
183
- /// SSE2 intrinsics to quickly find all newlines.
184
- #[ target_feature( enable = "sse2" ) ]
185
- unsafe fn analyze_source_file_sse2(
186
- src: & str ,
187
- lines: & mut Vec <RelativeBytePos >,
188
- multi_byte_chars: & mut Vec <MultiByteChar >,
189
- ) {
190
- #[ cfg( target_arch = "x86" ) ]
191
- use std:: arch:: x86:: * ;
192
- #[ cfg( target_arch = "x86_64" ) ]
193
- use std:: arch:: x86_64:: * ;
194
-
195
- const CHUNK_SIZE : usize = 16 ;
196
-
197
- let src_bytes = src. as_bytes( ) ;
198
-
199
- let chunk_count = src. len( ) / CHUNK_SIZE ;
200
-
201
- // This variable keeps track of where we should start decoding a
202
- // chunk. If a multi-byte character spans across chunk boundaries,
203
- // we need to skip that part in the next chunk because we already
204
- // handled it.
205
- let mut intra_chunk_offset = 0 ;
206
-
207
- for chunk_index in 0 ..chunk_count {
208
- let ptr = src_bytes. as_ptr( ) as * const __m128i;
209
- // We don't know if the pointer is aligned to 16 bytes, so we
210
- // use `loadu`, which supports unaligned loading.
211
- let chunk = unsafe { _mm_loadu_si128( ptr. add( chunk_index) ) } ;
212
-
213
- // For character in the chunk, see if its byte value is < 0, which
214
- // indicates that it's part of a UTF-8 char.
215
- let multibyte_test = unsafe { _mm_cmplt_epi8( chunk, _mm_set1_epi8( 0 ) ) } ;
216
- // Create a bit mask from the comparison results.
217
- let multibyte_mask = unsafe { _mm_movemask_epi8( multibyte_test) } ;
218
-
219
- // If the bit mask is all zero, we only have ASCII chars here:
220
- if multibyte_mask == 0 {
221
- assert!( intra_chunk_offset == 0 ) ;
222
-
223
- // Check for newlines in the chunk
224
- let newlines_test = unsafe { _mm_cmpeq_epi8( chunk, _mm_set1_epi8( b'\n' as i8 ) ) } ;
225
- let mut newlines_mask = unsafe { _mm_movemask_epi8( newlines_test) } ;
226
-
227
- let output_offset = RelativeBytePos :: from_usize( chunk_index * CHUNK_SIZE + 1 ) ;
228
-
229
- while newlines_mask != 0 {
230
- let index = newlines_mask. trailing_zeros( ) ;
231
-
232
- lines. push( RelativeBytePos ( index) + output_offset) ;
233
-
234
- // Clear the bit, so we can find the next one.
235
- newlines_mask &= newlines_mask - 1 ;
236
- }
237
- } else {
238
- // The slow path.
239
- // There are multibyte chars in here, fallback to generic decoding.
240
- let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
241
- intra_chunk_offset = analyze_source_file_generic(
242
- & src[ scan_start..] ,
243
- CHUNK_SIZE - intra_chunk_offset,
244
- RelativeBytePos :: from_usize( scan_start) ,
245
- lines,
246
- multi_byte_chars,
247
- ) ;
248
- }
249
- }
250
-
251
- // There might still be a tail left to analyze
252
- let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
253
- if tail_start < src. len( ) {
254
- analyze_source_file_generic(
255
- & src[ tail_start..] ,
256
- src. len( ) - tail_start,
257
- RelativeBytePos :: from_usize( tail_start) ,
258
- lines,
259
- multi_byte_chars,
260
- ) ;
261
- }
262
- }
263
- }
264
- _ => {
265
- // The target (or compiler version) does not support SSE2 ...
266
- fn analyze_source_file_dispatch(
267
- src: & str ,
268
- lines: & mut Vec <RelativeBytePos >,
269
- multi_byte_chars: & mut Vec <MultiByteChar >,
270
- ) {
271
- analyze_source_file_generic(
272
- src,
273
- src. len( ) ,
274
- RelativeBytePos :: from_u32( 0 ) ,
275
- lines,
276
- multi_byte_chars,
277
- ) ;
278
- }
279
- }
280
- }
281
-
282
34
// `scan_len` determines the number of bytes in `src` to scan. Note that the
283
35
// function can read past `scan_len` if a multi-byte character start within the
284
36
// range but extends past it. The overflow is returned by the function.
0 commit comments