|
1 | 1 | //! Operations related to UTF-8 validation.
|
2 | 2 |
|
3 | 3 | use super::Utf8Error;
|
4 |
| -use crate::intrinsics::{const_eval_select, unlikely}; |
| 4 | +use crate::intrinsics::const_eval_select; |
5 | 5 |
|
6 | 6 | /// Returns the initial codepoint accumulator for the first byte.
|
7 | 7 | /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
|
@@ -243,7 +243,7 @@ const unsafe fn run_with_error_handling(
|
243 | 243 | ) -> Result<(), Utf8Error> {
|
244 | 244 | while i < bytes.len() {
|
245 | 245 | let new_st = next_state(*st, bytes[i]);
|
246 |
| - if unlikely(new_st & STATE_MASK == ST_ERROR) { |
| 246 | + if new_st & STATE_MASK == ST_ERROR { |
247 | 247 | // SAFETY: Guaranteed by the caller.
|
248 | 248 | let (valid_up_to, error_len) = unsafe { resolve_error_location(*st, bytes, i) };
|
249 | 249 | return Err(Utf8Error { valid_up_to, error_len: Some(error_len) });
|
@@ -287,7 +287,9 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
|
287 | 287 | const { assert!(ASCII_CHUNK_SIZE % MAIN_CHUNK_SIZE == 0) };
|
288 | 288 |
|
289 | 289 | let mut st = ST_ACCEPT;
|
290 |
| - let mut i = 0usize; |
| 290 | + let mut i = bytes.len() % MAIN_CHUNK_SIZE; |
| 291 | + // SAFETY: Start at initial state ACCEPT. |
| 292 | + unsafe { run_with_error_handling(&mut st, &bytes[..i], 0)? }; |
291 | 293 |
|
292 | 294 | while i + MAIN_CHUNK_SIZE <= bytes.len() {
|
293 | 295 | // Fast path: if the current state is ACCEPT, we can skip to the next non-ASCII chunk.
|
@@ -320,20 +322,16 @@ fn run_utf8_validation_rt(bytes: &[u8]) -> Result<(), Utf8Error> {
|
320 | 322 | for &b in chunk {
|
321 | 323 | new_st = next_state(new_st, b);
|
322 | 324 | }
|
323 |
| - if unlikely(new_st & STATE_MASK == ST_ERROR) { |
324 |
| - // Discard the current chunk erronous result, and reuse the trailing chunk handling to |
325 |
| - // report the error location. |
326 |
| - break; |
| 325 | + if new_st & STATE_MASK == ST_ERROR { |
| 326 | + // SAFETY: `st` is the last state after executing `bytes[..i]` without encountering any error. |
| 327 | + return unsafe { run_with_error_handling(&mut st, bytes, i) }; |
327 | 328 | }
|
328 | 329 |
|
329 | 330 | st = new_st;
|
330 | 331 | i += MAIN_CHUNK_SIZE;
|
331 | 332 | }
|
332 | 333 |
|
333 |
| - // SAFETY: `st` is the last state after executing `bytes[..i]` without encountering any error. |
334 |
| - unsafe { run_with_error_handling(&mut st, bytes, i)? }; |
335 |
| - |
336 |
| - if unlikely(st & STATE_MASK != ST_ACCEPT) { |
| 334 | + if st & STATE_MASK != ST_ACCEPT { |
337 | 335 | // SAFETY: Same as above.
|
338 | 336 | let (valid_up_to, _) = unsafe { resolve_error_location(st, bytes, bytes.len()) };
|
339 | 337 | return Err(Utf8Error { valid_up_to, error_len: None });
|
|
0 commit comments